def sklearn_batch_data(fn): rd = mlsql.read_data() fitParams = mlsql.params()["fitParam"] batch_size = int(mlsql.get_param(fitParams, "batchSize", 1000)) label_size = int(mlsql.get_param(fitParams, "labelSize", -1)) x_name = mlsql.get_param(fitParams, "inputCol", "features") y_name = mlsql.get_param(fitParams, "label", "label") for items in rd(max_records=batch_size): if len(items) == 0: continue X = [item[x_name].toArray() for item in items] y = [item[y_name] for item in items] fn(X, y, label_size)
import tensorflow as tf import mlsql_model import mlsql import sys import mlsql_tf rd = mlsql.read_data() p = mlsql.params() fitParams = p["fitParam"] tf.reset_default_graph config = tf.ConfigProto() gpuPercent = float(mlsql.get_param(fitParams, "gpuPercent", -1)) featureSize = int(mlsql.get_param(fitParams, "featureSize", -1)) label_size = int(mlsql.get_param(fitParams, "labelSize", -1)) layer_group = [int(i) for i in mlsql.get_param(fitParams, "layerGroup", "300").split(",")] batch_size = int(mlsql.get_param(fitParams, "batchSize", 32)) epochs = int(mlsql.get_param(fitParams, "epochs", 1)) print_interval = int(mlsql.get_param(fitParams, "printInterval", 1)) input_col = mlsql.get_param(fitParams, "inputCol", "features") label_col = mlsql.get_param(fitParams, "labelCol", "label") tempModelLocalPath = p["internalSystemParam"]["tempModelLocalPath"] if featureSize < 0 or label_size < 0: raise RuntimeError("featureSize or labelSize is required") if gpuPercent > 0:
import tensorflow as tf import mlsql_model import mlsql import sys import mlsql_tf rd = mlsql.read_data() p = mlsql.params() fitParams = p["fitParam"] tf.reset_default_graph config = tf.ConfigProto() gpuPercent = float(mlsql.get_param(fitParams, "gpuPercent", -1)) featureSize = int(mlsql.get_param(fitParams, "featureSize", -1)) wordEmbeddingSize = int(mlsql.get_param(fitParams, "wordEmbeddingSize", -1)) sequenceLen = featureSize / wordEmbeddingSize label_size = int(mlsql.get_param(fitParams, "labelSize", -1)) layer_group = [ int(i) for i in mlsql.get_param(fitParams, "layerGroup", "300").split(",") ] print_interval = int(mlsql.get_param(fitParams, "printInterval", 1)) window_group = [ int(i) for i in mlsql.get_param(fitParams, "windowGroup", "5,10,15").split(",") ]