def sk_save_model(model): isp = mlsql.params()["internalSystemParam"] tempModelLocalPath = isp["tempModelLocalPath"] if "tempModelLocalPath" in isp else "/tmp/" dir_name = tempModelLocalPath if os.path.exists(dir_name): shutil.rmtree(dir_name) os.makedirs(dir_name) with open(os.path.join(dir_name, "model.pickle"), "wb") as f: pickle.dump(model, f, protocol=2)
def sk_save_model(model): isp = mlsql.params()["internalSystemParam"] tempModelLocalPath = isp[ "tempModelLocalPath"] if "tempModelLocalPath" in isp else "/tmp/" dir_name = tempModelLocalPath if os.path.exists(dir_name): shutil.rmtree(dir_name) os.makedirs(dir_name) with open(os.path.join(dir_name, "model.pickle"), "wb") as f: pickle.dump(model, f, protocol=2)
def sklearn_batch_data(fn): rd = mlsql.read_data() fitParams = mlsql.params()["fitParam"] batch_size = int(mlsql.get_param(fitParams, "batchSize", 1000)) label_size = int(mlsql.get_param(fitParams, "labelSize", -1)) x_name = mlsql.get_param(fitParams, "inputCol", "features") y_name = mlsql.get_param(fitParams, "label", "label") for items in rd(max_records=batch_size): if len(items) == 0: continue X = [item[x_name].toArray() for item in items] y = [item[y_name] for item in items] fn(X, y, label_size)
def sklearn_all_data(): rd = mlsql.read_data() fitParams = mlsql.params()["fitParam"] X = [] y = [] x_name = fitParams["inputCol"] if "inputCol" in fitParams else "features" y_name = fitParams["label"] if "label" in fitParams else "label" debug = "debug" in fitParams and bool(fitParams["debug"]) counter = 0 for items in rd(max_records=1000): item_size = len(items) if debug: counter += item_size print("{} collect data from kafka:{}".format(fitParams["alg"], counter)) if item_size == 0: continue X = X + [item[x_name].toArray() for item in items] y = y + [item[y_name] for item in items] return X, y
def sklearn_configure_params(clf): fitParams = mlsql.params()["fitParam"] def t(v, convert_v): if type(v) == float: return float(convert_v) elif type(v) == int: return int(convert_v) elif type(v) == list: if type(v[0]) == int: return [int(i) for i in v] if type(v[0]) == float: return [float(i) for i in v] return v else: return convert_v for name in clf.get_params(): if name in fitParams: dv = clf.get_params()[name] setattr(clf, name, t(dv, fitParams[name]))
tf.summary.scalar("accuracy", accurate) summ = tf.summary.merge_all() sess.run(tf.global_variables_initializer()) def trans(i): if i == 0: return [0, 1] if i == 1: return [1, 0] for items in rd(max_records=2): X = [item["features"].toArray() for item in items] Y = [trans(item["label"]) for item in items] if len(X) > 0: _, gs = sess.run([train_step, global_step], feed_dict={input_x: X, input_y: Y}) [train_accuracy, s, loss] = sess.run([accurate, summ, xent], feed_dict={input_x: X, input_y: Y}) print('train_accuracy %g, loss: %g, global step: %d' % ( train_accuracy, loss, gs)) sys.stdout.flush() p = mlsql.params() mlsql_model.save_model(p["internalSystemParam"]["tempModelLocalPath"], sess, input_x, input_y, True) sess.close()
if __name__ == "__main__": warnings.filterwarnings("ignore") np.random.seed(40) # Read the wine-quality csv file (make sure you're running this from the root of MLflow!) wine_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "wine-quality.csv") data = pd.read_csv(wine_path) # Split the data into training and test sets. (0.75, 0.25) split. train, test = train_test_split(data) # The predicted column is "quality" which is a scalar from [3, 9] train_x = train.drop(["quality"], axis=1) test_x = test.drop(["quality"], axis=1) train_y = train[["quality"]] test_y = test[["quality"]] alpha = float(sys.argv[1]) if len(sys.argv) > 1 else 0.5 l1_ratio = float(sys.argv[2]) if len(sys.argv) > 2 else 0.5 lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42) lr.fit(train_x, train_y) isp = mlsql.params()["internalSystemParam"] tempModelLocalPath = isp["tempModelLocalPath"] if not os.path.exists(tempModelLocalPath): os.makedirs(tempModelLocalPath) with open(tempModelLocalPath + "/model.pkl", "wb") as f: pickle.dump(lr, f)
return rmse, mae, r2 if __name__ == "__main__": warnings.filterwarnings("ignore") np.random.seed(40) # Read the wine-quality csv file (make sure you're running this from the root of MLflow!) wine_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "wine-quality.csv") data = pd.read_csv(wine_path) # Split the data into training and test sets. (0.75, 0.25) split. train, test = train_test_split(data) # The predicted column is "quality" which is a scalar from [3, 9] train_x = train.drop(["quality"], axis=1) test_x = test.drop(["quality"], axis=1) train_y = train[["quality"]] test_y = test[["quality"]] alpha = float(sys.argv[1]) if len(sys.argv) > 1 else 0.5 l1_ratio = float(sys.argv[2]) if len(sys.argv) > 2 else 0.5 lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42) lr.fit(train_x, train_y) isp = mlsql.params()["internalSystemParam"] tempModelLocalPath = isp["tempModelLocalPath"] if not os.path.exists(tempModelLocalPath): os.makedirs(tempModelLocalPath) with open(tempModelLocalPath + "/model.pkl", "wb") as f: pickle.dump(lr, f)
def param(key, value): if key in mlsql.params()["fitParams"]: res = mlsql.params()["fitParams"][key] else: res = value return res
import tensorflow as tf import mlsql_model import mlsql import sys import mlsql_tf rd = mlsql.read_data() p = mlsql.params() fitParams = p["fitParam"] tf.reset_default_graph config = tf.ConfigProto() gpuPercent = float(mlsql.get_param(fitParams, "gpuPercent", -1)) featureSize = int(mlsql.get_param(fitParams, "featureSize", -1)) wordEmbeddingSize = int(mlsql.get_param(fitParams, "wordEmbeddingSize", -1)) sequenceLen = featureSize / wordEmbeddingSize label_size = int(mlsql.get_param(fitParams, "labelSize", -1)) layer_group = [ int(i) for i in mlsql.get_param(fitParams, "layerGroup", "300").split(",") ] print_interval = int(mlsql.get_param(fitParams, "printInterval", 1)) window_group = [ int(i) for i in mlsql.get_param(fitParams, "windowGroup", "5,10,15").split(",") ]
def udf(func): import mlsql p = mlsql.params() func_path = p["systemParam"]["funcPath"] write_binary_file(func_path, func)