Beispiel #1
0
def sklearn_batch_data(fn):
    rd = mlsql.read_data()
    fitParams = mlsql.params()["fitParam"]
    batch_size = int(mlsql.get_param(fitParams, "batchSize", 1000))
    label_size = int(mlsql.get_param(fitParams, "labelSize", -1))
    x_name = mlsql.get_param(fitParams, "inputCol", "features")
    y_name = mlsql.get_param(fitParams, "label", "label")
    for items in rd(max_records=batch_size):
        if len(items) == 0:
            continue
        X = [item[x_name].toArray() for item in items]
        y = [item[y_name] for item in items]
        fn(X, y, label_size)
import tensorflow as tf
import mlsql_model
import mlsql
import sys
import mlsql_tf

rd = mlsql.read_data()
p = mlsql.params()

fitParams = p["fitParam"]

tf.reset_default_graph
config = tf.ConfigProto()

gpuPercent = float(mlsql.get_param(fitParams, "gpuPercent", -1))
featureSize = int(mlsql.get_param(fitParams, "featureSize", -1))
label_size = int(mlsql.get_param(fitParams, "labelSize", -1))
layer_group = [int(i) for i in mlsql.get_param(fitParams, "layerGroup", "300").split(",")]

batch_size = int(mlsql.get_param(fitParams, "batchSize", 32))
epochs = int(mlsql.get_param(fitParams, "epochs", 1))
print_interval = int(mlsql.get_param(fitParams, "printInterval", 1))

input_col = mlsql.get_param(fitParams, "inputCol", "features")
label_col = mlsql.get_param(fitParams, "labelCol", "label")
tempModelLocalPath = p["internalSystemParam"]["tempModelLocalPath"]

if featureSize < 0 or label_size < 0:
    raise RuntimeError("featureSize or labelSize is required")

if gpuPercent > 0:
import tensorflow as tf
import mlsql_model
import mlsql
import sys
import mlsql_tf

rd = mlsql.read_data()
p = mlsql.params()

fitParams = p["fitParam"]

tf.reset_default_graph
config = tf.ConfigProto()

gpuPercent = float(mlsql.get_param(fitParams, "gpuPercent", -1))
featureSize = int(mlsql.get_param(fitParams, "featureSize", -1))
wordEmbeddingSize = int(mlsql.get_param(fitParams, "wordEmbeddingSize", -1))
sequenceLen = featureSize / wordEmbeddingSize

label_size = int(mlsql.get_param(fitParams, "labelSize", -1))
layer_group = [
    int(i) for i in mlsql.get_param(fitParams, "layerGroup", "300").split(",")
]

print_interval = int(mlsql.get_param(fitParams, "printInterval", 1))

window_group = [
    int(i)
    for i in mlsql.get_param(fitParams, "windowGroup", "5,10,15").split(",")
]