class SGDLRLocalExample(object): def __init__(self): self.conf = Configuration() def set_conf(self): # Feature number of train data feature_num = 124 # Total iteration number epoch_num = 20 # Validation sample Ratio v_ratio = 0.1 # Data format, libsvm or dummy data_fmt = "libsvm" # Train batch number per epoch. sp_ratio = 1.0 # Batch number batch_num = 10 # Learning rate learn_rate = 1.0 # Decay of learning rate decay = 0.1 # Regularization coefficient reg = 0.2 # Set local deploy mode self.conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL") # Set basic configuration keys self.conf.set_boolean("mapred.mapper.new-api", True) self.conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, 'org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat') self.conf.set_boolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, True) # Set data format self.conf.set(MLConf.ML_DATAFORMAT, data_fmt) # set angel resource parameters #worker, #task, #PS self.conf.set_int(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1) self.conf.set_int(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1) self.conf.set_int(AngelConf.ANGEL_PS_NUMBER, 1) # set sgd LR algorithm parameters #feature #epoch self.conf.set(MLConf.ML_FEATURE_NUM, str(feature_num)) self.conf.set(MLConf.ML_EPOCH_NUM, str(epoch_num)) self.conf.set(MLConf.ML_BATCH_SAMPLE_Ratio, str(sp_ratio)) self.conf.set(MLConf.ML_VALIDATE_RATIO, str(v_ratio)) self.conf.set(MLConf.ML_LEARN_RATE, str(learn_rate)) self.conf.set(MLConf.ML_LEARN_DECAY, str(decay)) self.conf.set(MLConf.ML_REG_L2, str(reg)) def train_on_local_cluster(self): self.set_conf() input_path = "../data/exampledata/LRLocalExampleData/a9a.train" LOCAL_FS = LocalFileSystem.DEFAULT_FS TMP_PATH = tempfile.gettempdir() save_path = LOCAL_FS + TMP_PATH + "/model" log_path = LOCAL_FS + TMP_PATH + "/log" # Set trainning data path self.conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, input_path) # Set save model path self.conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, save_path) # Set log path self.conf.set(AngelConf.ANGEL_LOG_PATH, log_path) # Set actionType train self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_TRAIN) runner = LRRunner() runner.train(self.conf) def inc_train(self): self.set_conf() input_path = "../data/exampledata/LRLocalExampleData/a9a.train" LOCAL_FS = LocalFileSystem.DEFAULT_FS TMP_PATH = tempfile.gettempdir() load_path = LOCAL_FS + TMP_PATH + "/model" save_path = LOCAL_FS + TMP_PATH + "/newmodel" log_path = LOCAL_FS + TMP_PATH + "/log" # Set trainning data path self.conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, input_path) # Set load model path self.conf.set(AngelConf.ANGEL_LOAD_MODEL_PATH, load_path) # Set save model path self.conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, save_path) # Set log path self.conf.set(AngelConf.ANGEL_LOG_PATH, log_path) # Set actionType incremental train self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_INC_TRAIN) runner = LRRunner() runner.inc_train(self.conf) def predict(self): self.set_conf() input_path = "../data/exampledata/LRLocalExampleData/a9a.test" LOCAL_FS = LocalFileSystem.DEFAULT_FS TMP_PATH = System.getProperty("java.io.tmpdir", "/tmp") load_path = LOCAL_FS + TMP_PATH + "/model" save_path = LOCAL_FS + TMP_PATH + "/model" log_path = LOCAL_FS + TMP_PATH + "/log" predict_path = LOCAL_FS + TMP_PATH + "/predict" # Set trainning data path self.conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, input_path) # Set load model path self.conf.set(AngelConf.ANGEL_LOAD_MODEL_PATH, load_path) # Set predict result path self.conf.set(AngelConf.ANGEL_PREDICT_PATH, predict_path) # Set log path self.conf.set(AngelConf.ANGEL_LOG_PATH, log_path) # Set actionType prediction self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_PREDICT()) runner = LRRunner() runner.predict(self.conf)
class LinearRegLocalExample(object): """ Linear Regression Example used for user test, similar to "com.tencent.angel.example.ml.LinearRegLocalExample". """ def __init__(self): self.conf= Configuration() def set_conf(self): """ Set up self.configuration for runtime environment. """ # Feature number of train data feature_num = 101 # Total iteration number epoch_num = 20 # Validation sample ratio v_ratio = 0.5 # Data format,libsvm or dummy data_fmt = 'libsvm' # Train batch number per epoch sp_ratio = 1 # Learning rate learn_rate = 0.1 # Decay of learning rate decay = 0.01 # Regularization coefficient reg = 0 # Set job queue, if you use YARN deploy mode, you can set job queue by # self.conf.set('mapreduce.job.queue.name', 'default') # Set local deploy mode self.conf.set(AngelConf.ANGEL_DEPLOY_MODE, 'LOCAL') # Set basic self.configuration keys self.conf.set_boolean('mapred.mapper.new-api', True) self.conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, 'org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat') self.conf.set_boolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, True) # Set data format self.conf.set(MLConf.ML_DATAFORMAT, data_fmt) # set angel resource parameters #worker, #tast, #ps self.conf.set_int(AngelConf.ANGEL_WORKERGROUP_NUMBER, 2) self.conf.set_int(AngelConf.ANGEL_WORKER_TASK_NUMBER, 10) self.conf.set_int(AngelConf.ANGEL_PS_NUMBER, 2) # set sgd LR algorithim parameters # feature # epoch self.conf.set(MLConf.ML_FEATURE_NUM, str(feature_num)) self.conf.set(MLConf.ML_EPOCH_NUM, str(epoch_num)) self.conf.set(MLConf.ML_BATCH_SAMPLE_Ratio, str(sp_ratio)) self.conf.set(MLConf.ML_VALIDATE_RATIO, str(v_ratio)) self.conf.set(MLConf.ML_LEARN_RATE, str(learn_rate)) self.conf.set(MLConf.ML_LEARN_DECAY, str(decay)) self.conf.set(MLConf.ML_REG_L2, str(reg)) def train_on_local_cluster(self): """ Train model on local cluster """ self.set_conf() input_path = '../data/exampledata/LinearRegression' LOCAL_FS = LocalFileSystem.DEFAULT_FS TMP_PATH = tempfile.gettempdir() log_path = ".src/test/log" model_path = 'file:///tmp/angel/model' self.conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, input_path) self.conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, model_path) self.conf.set(AngelConf.ANGEL_LOG_PATH, log_path) self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_TRAIN) self.conf.set("fs.defaultFS", LOCAL_FS + TMP_PATH) runner = LinearRegRunner() runner.train(self.conf) def inc_train(self): self.set_conf() input_path = "../data/exampledata/LinearRegression/LinearReg100.train" LOCAL_FS = LocalFileSystem.DEFAULT_FS TMP_PATH = tempfile.gettempdir() log_path = "./src/test/log" # Set trainning data path self.conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, inputPath) # Set load model path self.conf.set(AngelConf.ANGEL_LOAD_MODEL_PATH, LOCAL_FS + TMP_PATH + "/model") # Set save model path self.conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/newmodel") # Set actionType incremental train self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_INC_TRAIN()) # Set log path self.conf.set(AngelConf.ANGEL_LOG_PATH, logPath) runner = LinearRegRunner() runner.incTrain(self.conf) def predict(self): self.set_conf() input_path = "../data/exampledata/LinearRegression/LinearReg100.train" LOCAL_FS = LocalFileSystem.DEFAULT_FS TMP_PATH = tempfile.gettempdir() # Set trainning data path self.conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, inputPath) # Set load model path self.conf.set(AngelConf.ANGEL_LOAD_MODEL_PATH, LOCAL_FS + TMP_PATH + "/model") # Set predict result path self.conf.set(AngelConf.ANGEL_PREDICT_PATH, LOCAL_FS + TMP_PATH + "/predict") # Set actionType prediction self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_PREDICT()) runner = LinearRegRunner() runner.predict(self.conf)
class KmeansLocalExample(object): def __init__(self): self.conf = Configuration() self.MLConf = MLConf() def set_conf(self): data_fmt = "libsvm" # Cluster center number center_num = 3 # Feature number of train data feature_num = 4 # Total iteration number epoch_num = 20 # Sample ratio per mini-batch sample_ratio = 1.0 # C c = 0.15 # Set local deploy mode self.conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL") # Set basic self.configuration key self.conf.set_boolean("mapred.mapper.new-api", True) self.conf.set( AngelConf.ANGEL_INPUTFORMAT_CLASS, 'org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat') self.conf.set_boolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, True) # Set angel resource parameters #worker, #task, #PS self.conf.set_int(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1) self.conf.set_int(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1) self.conf.set_int(AngelConf.ANGEL_PS_NUMBER, 1) # Set Kmeans algorithm parameters #cluster #feature #epoch self.conf.set(MLConf.KMEANS_CENTER_NUM, str(center_num)) self.conf.set(MLConf.ML_FEATURE_NUM, str(feature_num)) self.conf.set(MLConf.ML_EPOCH_NUM, str(epoch_num)) self.conf.set(MLConf.KMEANS_SAMPLE_RATIO_PERBATCH, str(sample_ratio)) self.conf.set(MLConf.kMEANS_C, str(c)) # Set data format self.conf.set(MLConf.ML_DATAFORMAT, data_fmt) def train(self): self.set_conf() input_path = "data/exampledata/clusteringLocalExampleData/iris" LOCAL_FS = LocalFileSystem.DEFAULT_FS TMP_PATH = tempfile.gettempdir() # Set trainning data path self.conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, input_path) # Set save model path self.conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/model") # Set log sava path self.conf.set(AngelConf.ANGEL_LOG_PATH, LOCAL_FS + TMP_PATH + "/kmeansLog/log") # Set actionType train self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_TRAIN) runner = KMeansRunner() runner.train(self.conf) angel_client = AngelClientFactory.get(self.conf) angel_client.stop() def predict_onLocal_cluster(self): self.set_conf() LOCAL_FS = LocalFileSystem.DEFAULT_FS TMP_PATH = tempfile.gettempdir() # Set trainning data path self.conf.set(AngelConf.ANGEL_PREDICT_DATA_PATH, input_path) # Set load model path self.conf.set(AngelConf.ANGEL_LOAD_MODEL_PATH, LOCAL_FS + TMP_PATH + "/model") # Set predict result path self.conf.set(AngelConf.ANGEL_PREDICT_PATH, LOCAL_FS + TMP_PATH + "/predict") # Set actionType prediction self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_PREDICT) runner = KMeansRunner() runner.predict(self.conf) angel_client = AngelClientFactory.get(self.conf) angel_client.stop()
class FMLocalExample(oject): def __init__(self): self.conf = Configuration() def set_conf(self): """ set parameter values of self.conf """ # Feature number of train data feature_num = 236 # Total iteration number epoch_num = 20 # Rank rank = 5 # Regularization parameters reg0 = 0.0 reg1 = 0.0 reg2 = 0.001 # Learn rage lr = 0.001 stev = 0.1 # Set local deploy mode self.conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL") # Set basic self.configuration keys self.conf.set_boolean("mapred.mapper.new-api", True) self.conf.set( AngelConf.ANGEL_INPUTFORMAT_CLASS, 'org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat') self.conf.set_boolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, True) #set angel resource parameters #worker, #task, #PS self.conf.set_int(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1) self.conf.set_int(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1) self.conf.set_int(AngelConf.ANGEL_PS_NUMBER, 1) #set FM algorithm parameters #feature #epoch self.conf.set(MLConf.ML_FEATURE_NUM, str(feature_num)) self.conf.set(MLConf.ML_EPOCH_NUM, str(epoch_num)) self.conf.set(MLConf.ML_FM_RANK, str(rank)) self.conf.set(MLConf.ML_LEARN_RATE, str(lr)) self.conf.set(MLConf.ML_FM_REG0, str(reg0)) self.conf.set(MLConf.ML_FM_REG1, str(reg1)) self.conf.set(MLConf.ML_FM_REG2, str(reg2)) self.conf.set(MLConf.ML_FM_V_STDDEV, str(stev)) def train_on_local_cluster(self): self.set_conf() input_path = "./src/test/data/fm/food_fm_libsvm" LOCAL_FS = LocalFileSystem.DEFAULT_FS TMP_PATH = tempfile.gettempdir() save_path = LOCAL_FS + TMP_PATH + "/model" log_path = LOCAL_FS + TMP_PATH + "/LRlog" # Set trainning data path self.conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, input_path) # Set save model path self.conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, save_path) # Set log path self.conf.set(AngelConf.ANGEL_LOG_PATH, log_path) # Set actionType train self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_TRAIN()) runner = FMRunner() runner.train(self.conf) angel_client = AngelClientFactory.get(self.conf) angel_client.stop() def fm_classification(self): input_path = "./src/test/data/fm/a9a.train" LOCAL_FS = LocalFileSystem.DEFAULT_FS TMP_PATH = tempfile.gettempdir() save_path = LOCAL_FS + TMP_PATH + "/model" log_path = LOCAL_FS + TMP_PATH + "/LRlog" # Set trainning data path self.conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, input_path) # Set save model path self.conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, save_path) # Set log path self.conf.set(AngelConf.ANGEL_LOG_PATH, log_path) # Set actionType train self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_TRAIN) # Set learnType self.conf.set(MLConf.ML_FM_LEARN_TYPE, "c") # Set feature number self.conf.set(MLConf.ML_FEATURE_NUM, str(124)) runner = FMRunner() runner.train(self.conf) angel_client = AngelClientFactory.get(self.conf) angel_client.stop()
class MFLocalExample(object): def __init__(self): self.conf = Configuration() self.MLConf = MLConf() def set_conf(self): inputPath = "../../data/exampledata/MFLocalExampleData" # Set local deploy mode self.conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL") # Set basic self.configuration keys self.conf.set_boolean("mapred.mapper.new-api", True) self.conf.set( AngelConf.ANGEL_INPUTFORMAT_CLASS, 'org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat') self.conf.set_boolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, True) # set angel resource parameters #worker, #task, #PS self.conf.set_int(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1) self.conf.set_int(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1) self.conf.set_int(AngelConf.ANGEL_PS_NUMBER, 1) LOCAL_FS = LocalFileSystem.DEFAULT_FS TMP_PATH = tempfile.gettempdir() # Set trainning data, and save model path self.conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, inputPath) self.conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/model") self.conf.set(AngelConf.ANGEL_LOG_PATH, LOCAL_FS + TMP_PATH + "/log") # Set actionType train self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_TRAIN) # Set MF algorithm parameters self.conf.set(MLConf.ML_MF_RANK, "200") self.conf.set(MLConf.ML_EPOCH_NUM, "8") self.conf.set(MLConf.ML_MF_ROW_BATCH_NUM, "2") self.conf.set(MLConf.ML_MF_ITEM_NUM, "1683") self.conf.set(MLConf.ML_MF_LAMBDA, "0.01") self.conf.set(MLConf.ML_MF_ETA, "0.0054") def train(self): self.set_conf() runner = MatrixFactorizationRunner() runner.train(self.conf)
class FMLocalExample(oject): def __init__(self): self.conf = Configuration() def set_conf(self): """ set parameter values of self.conf """ # Feature number of train data feature_num = 236 # Total iteration number epoch_num = 20 # Rank rank = 5 # Regularization parameters reg0 = 0.0 reg1 = 0.0 reg2 = 0.001 # Learn rage lr = 0.001 stev = 0.1 # Set local deploy mode self.conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL") # Set basic self.configuration keys self.conf.set_boolean("mapred.mapper.new-api", True) self.conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, 'org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat') self.conf.set_boolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, True) #set angel resource parameters #worker, #task, #PS self.conf.set_int(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1) self.conf.set_int(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1) self.conf.set_int(AngelConf.ANGEL_PS_NUMBER, 1) #set FM algorithm parameters #feature #epoch self.conf.set(MLConf.ML_FEATURE_NUM, str(feature_num)) self.conf.set(MLConf.ML_EPOCH_NUM, str(epoch_num)) self.conf.set(MLConf.ML_FM_RANK, str(rank)) self.conf.set(MLConf.ML_LEARN_RATE, str(lr)) self.conf.set(MLConf.ML_FM_REG0, str(reg0)) self.conf.set(MLConf.ML_FM_REG1, str(reg1)) self.conf.set(MLConf.ML_FM_REG2, str(reg2)) self.conf.set(MLConf.ML_FM_V_STDDEV, str(stev)) def train_on_local_cluster(self): self.set_conf() input_path = "./src/test/data/fm/food_fm_libsvm" LOCAL_FS = LocalFileSystem.DEFAULT_FS TMP_PATH = tempfile.gettempdir() save_path = LOCAL_FS + TMP_PATH + "/model" log_path = LOCAL_FS + TMP_PATH + "/LRlog" # Set trainning data path self.conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, input_path) # Set save model path self.conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, save_path) # Set log path self.conf.set(AngelConf.ANGEL_LOG_PATH, log_path) # Set actionType train self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_TRAIN()) runner = FMRunner() runner.train(self.conf) angel_client = AngelClientFactory.get(self.conf) angel_client.stop() def fm_classification(self): input_path = "./src/test/data/fm/a9a.train" LOCAL_FS = LocalFileSystem.DEFAULT_FS TMP_PATH = tempfile.gettempdir() save_path = LOCAL_FS + TMP_PATH + "/model" log_path = LOCAL_FS + TMP_PATH + "/LRlog" # Set trainning data path self.conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, input_path) # Set save model path self.conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, save_path) # Set log path self.conf.set(AngelConf.ANGEL_LOG_PATH, log_path) # Set actionType train self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_TRAIN) # Set learnType self.conf.set(MLConf.ML_FM_LEARN_TYPE, "c") # Set feature number self.conf.set(MLConf.ML_FEATURE_NUM, str(124)) runner = FMRunner() runner.train(self.conf) angel_client = AngelClientFactory.get(self.conf) angel_client.stop()
class LinearRegLocalExample(object): """ Linear Regression Example used for user test, similar to "com.tencent.angel.example.ml.LinearRegLocalExample". """ def __init__(self): self.conf = Configuration() def set_conf(self): """ Set up self.configuration for runtime environment. """ # Feature number of train data feature_num = 101 # Total iteration number epoch_num = 20 # Validation sample ratio v_ratio = 0.5 # Data format,libsvm or dummy data_fmt = 'libsvm' # Train batch number per epoch sp_ratio = 1 # Learning rate learn_rate = 0.1 # Decay of learning rate decay = 0.01 # Regularization coefficient reg = 0 # Set job queue, if you use YARN deploy mode, you can set job queue by # self.conf.set('mapreduce.job.queue.name', 'default') # Set local deploy mode self.conf.set(AngelConf.ANGEL_DEPLOY_MODE, 'LOCAL') # Set basic self.configuration keys self.conf.set_boolean('mapred.mapper.new-api', True) self.conf.set( AngelConf.ANGEL_INPUTFORMAT_CLASS, 'org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat') self.conf.set_boolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, True) # Set data format self.conf.set(MLConf.ML_DATAFORMAT, data_fmt) # set angel resource parameters #worker, #tast, #ps self.conf.set_int(AngelConf.ANGEL_WORKERGROUP_NUMBER, 2) self.conf.set_int(AngelConf.ANGEL_WORKER_TASK_NUMBER, 10) self.conf.set_int(AngelConf.ANGEL_PS_NUMBER, 2) # set sgd LR algorithim parameters # feature # epoch self.conf.set(MLConf.ML_FEATURE_NUM, str(feature_num)) self.conf.set(MLConf.ML_EPOCH_NUM, str(epoch_num)) self.conf.set(MLConf.ML_BATCH_SAMPLE_Ratio, str(sp_ratio)) self.conf.set(MLConf.ML_VALIDATE_RATIO, str(v_ratio)) self.conf.set(MLConf.ML_LEARN_RATE, str(learn_rate)) self.conf.set(MLConf.ML_LEARN_DECAY, str(decay)) self.conf.set(MLConf.ML_REG_L2, str(reg)) def train_on_local_cluster(self): """ Train model on local cluster """ self.set_conf() input_path = '../data/exampledata/LinearRegression' LOCAL_FS = LocalFileSystem.DEFAULT_FS TMP_PATH = tempfile.gettempdir() log_path = ".src/test/log" model_path = 'file:///tmp/angel/model' self.conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, input_path) self.conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, model_path) self.conf.set(AngelConf.ANGEL_LOG_PATH, log_path) self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_TRAIN) self.conf.set("fs.defaultFS", LOCAL_FS + TMP_PATH) runner = LinearRegRunner() runner.train(self.conf) def inc_train(self): self.set_conf() input_path = "../data/exampledata/LinearRegression/LinearReg100.train" LOCAL_FS = LocalFileSystem.DEFAULT_FS TMP_PATH = tempfile.gettempdir() log_path = "./src/test/log" # Set trainning data path self.conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, inputPath) # Set load model path self.conf.set(AngelConf.ANGEL_LOAD_MODEL_PATH, LOCAL_FS + TMP_PATH + "/model") # Set save model path self.conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/newmodel") # Set actionType incremental train self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_INC_TRAIN()) # Set log path self.conf.set(AngelConf.ANGEL_LOG_PATH, logPath) runner = LinearRegRunner() runner.incTrain(self.conf) def predict(self): self.set_conf() input_path = "../data/exampledata/LinearRegression/LinearReg100.train" LOCAL_FS = LocalFileSystem.DEFAULT_FS TMP_PATH = tempfile.gettempdir() # Set trainning data path self.conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, inputPath) # Set load model path self.conf.set(AngelConf.ANGEL_LOAD_MODEL_PATH, LOCAL_FS + TMP_PATH + "/model") # Set predict result path self.conf.set(AngelConf.ANGEL_PREDICT_PATH, LOCAL_FS + TMP_PATH + "/predict") # Set actionType prediction self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_PREDICT()) runner = LinearRegRunner() runner.predict(self.conf)
class MFLocalExample(object): def __init__(self): self.conf= Configuration() self.MLConf = MLConf() def set_conf(self): inputPath = "../../data/exampledata/MFLocalExampleData" # Set local deploy mode self.conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL") # Set basic self.configuration keys self.conf.set_boolean("mapred.mapper.new-api", True) self.conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, 'org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat') self.conf.set_boolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, True) # set angel resource parameters #worker, #task, #PS self.conf.set_int(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1) self.conf.set_int(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1) self.conf.set_int(AngelConf.ANGEL_PS_NUMBER, 1) LOCAL_FS = LocalFileSystem.DEFAULT_FS TMP_PATH = tempfile.gettempdir() # Set trainning data, and save model path self.conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, inputPath) self.conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/model") self.conf.set(AngelConf.ANGEL_LOG_PATH, LOCAL_FS + TMP_PATH + "/log") # Set actionType train self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_TRAIN) # Set MF algorithm parameters self.conf.set(MLConf.ML_MF_RANK, "200") self.conf.set(MLConf.ML_EPOCH_NUM, "8") self.conf.set(MLConf.ML_MF_ROW_BATCH_NUM, "2") self.conf.set(MLConf.ML_MF_ITEM_NUM, "1683") self.conf.set(MLConf.ML_MF_LAMBDA, "0.01") self.conf.set(MLConf.ML_MF_ETA, "0.0054") def train(self): self.set_conf() runner = MatrixFactorizationRunner() runner.train(self.conf)