def __init__(self): self.conf = Configuration() self.MLConf = MLConf()
class SGDLRLocalExample(object): def __init__(self): self.conf = Configuration() def set_conf(self): # Feature number of train data feature_num = 124 # Total iteration number epoch_num = 20 # Validation sample Ratio v_ratio = 0.1 # Data format, libsvm or dummy data_fmt = "libsvm" # Train batch number per epoch. sp_ratio = 1.0 # Batch number batch_num = 10 # Learning rate learn_rate = 1.0 # Decay of learning rate decay = 0.1 # Regularization coefficient reg = 0.2 # Set local deploy mode self.conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL") # Set basic configuration keys self.conf.set_boolean("mapred.mapper.new-api", True) self.conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, 'org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat') self.conf.set_boolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, True) # Set data format self.conf.set(MLConf.ML_DATAFORMAT, data_fmt) # set angel resource parameters #worker, #task, #PS self.conf.set_int(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1) self.conf.set_int(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1) self.conf.set_int(AngelConf.ANGEL_PS_NUMBER, 1) # set sgd LR algorithm parameters #feature #epoch self.conf.set(MLConf.ML_FEATURE_NUM, str(feature_num)) self.conf.set(MLConf.ML_EPOCH_NUM, str(epoch_num)) self.conf.set(MLConf.ML_BATCH_SAMPLE_Ratio, str(sp_ratio)) self.conf.set(MLConf.ML_VALIDATE_RATIO, str(v_ratio)) self.conf.set(MLConf.ML_LEARN_RATE, str(learn_rate)) self.conf.set(MLConf.ML_LEARN_DECAY, str(decay)) self.conf.set(MLConf.ML_REG_L2, str(reg)) def train_on_local_cluster(self): self.set_conf() input_path = "../data/exampledata/LRLocalExampleData/a9a.train" LOCAL_FS = LocalFileSystem.DEFAULT_FS TMP_PATH = tempfile.gettempdir() save_path = LOCAL_FS + TMP_PATH + "/model" log_path = LOCAL_FS + TMP_PATH + "/log" # Set trainning data path self.conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, input_path) # Set save model path self.conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, save_path) # Set log path self.conf.set(AngelConf.ANGEL_LOG_PATH, log_path) # Set actionType train self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_TRAIN) runner = LRRunner() runner.train(self.conf) def inc_train(self): self.set_conf() input_path = "../data/exampledata/LRLocalExampleData/a9a.train" LOCAL_FS = LocalFileSystem.DEFAULT_FS TMP_PATH = tempfile.gettempdir() load_path = LOCAL_FS + TMP_PATH + "/model" save_path = LOCAL_FS + TMP_PATH + "/newmodel" log_path = LOCAL_FS + TMP_PATH + "/log" # Set trainning data path self.conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, input_path) # Set load model path self.conf.set(AngelConf.ANGEL_LOAD_MODEL_PATH, load_path) # Set save model path self.conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, save_path) # Set log path self.conf.set(AngelConf.ANGEL_LOG_PATH, log_path) # Set actionType incremental train self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_INC_TRAIN) runner = LRRunner() runner.inc_train(self.conf) def predict(self): self.set_conf() input_path = "../data/exampledata/LRLocalExampleData/a9a.test" LOCAL_FS = LocalFileSystem.DEFAULT_FS TMP_PATH = System.getProperty("java.io.tmpdir", "/tmp") load_path = LOCAL_FS + TMP_PATH + "/model" save_path = LOCAL_FS + TMP_PATH + "/model" log_path = LOCAL_FS + TMP_PATH + "/log" predict_path = LOCAL_FS + TMP_PATH + "/predict" # Set trainning data path self.conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, input_path) # Set load model path self.conf.set(AngelConf.ANGEL_LOAD_MODEL_PATH, load_path) # Set predict result path self.conf.set(AngelConf.ANGEL_PREDICT_PATH, predict_path) # Set log path self.conf.set(AngelConf.ANGEL_LOG_PATH, log_path) # Set actionType prediction self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_PREDICT()) runner = LRRunner() runner.predict(self.conf)
def __init__(self): self.conf = Configuration()
""" An interactive shell. This file is designed to be launched as a PYTHONSTARTUP script. """ import platform import tempfile from hadoop.local_fs import LocalFileSystem from pyangel.context import Configuration from pyangel.ml.conf import MLConf from pyangel.conf import AngelConf try: Configuration._init() except RuntimeError: print("Oops!PyAngel failed to initialize") # To Do # Modify the way to get current Angel version conf = Configuration() conf[AngelConf.ANGEL_INPUTFORMAT_CLASS] = 'org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat' conf.set_boolean("mapred.mapper.new-api", True) conf.set_boolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, True) LOCAL_FS = LocalFileSystem.DEFAULT_FS TMP_PATH = tempfile.gettempdir()
class KmeansLocalExample(object): def __init__(self): self.conf = Configuration() self.MLConf = MLConf() def set_conf(self): data_fmt = "libsvm" # Cluster center number center_num = 3 # Feature number of train data feature_num = 4 # Total iteration number epoch_num = 20 # Sample ratio per mini-batch sample_ratio = 1.0 # C c = 0.15 # Set local deploy mode self.conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL") # Set basic self.configuration key self.conf.set_boolean("mapred.mapper.new-api", True) self.conf.set( AngelConf.ANGEL_INPUTFORMAT_CLASS, 'org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat') self.conf.set_boolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, True) # Set angel resource parameters #worker, #task, #PS self.conf.set_int(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1) self.conf.set_int(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1) self.conf.set_int(AngelConf.ANGEL_PS_NUMBER, 1) # Set Kmeans algorithm parameters #cluster #feature #epoch self.conf.set(MLConf.KMEANS_CENTER_NUM, str(center_num)) self.conf.set(MLConf.ML_FEATURE_NUM, str(feature_num)) self.conf.set(MLConf.ML_EPOCH_NUM, str(epoch_num)) self.conf.set(MLConf.KMEANS_SAMPLE_RATIO_PERBATCH, str(sample_ratio)) self.conf.set(MLConf.kMEANS_C, str(c)) # Set data format self.conf.set(MLConf.ML_DATAFORMAT, data_fmt) def train(self): self.set_conf() input_path = "data/exampledata/clusteringLocalExampleData/iris" LOCAL_FS = LocalFileSystem.DEFAULT_FS TMP_PATH = tempfile.gettempdir() # Set trainning data path self.conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, input_path) # Set save model path self.conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/model") # Set log sava path self.conf.set(AngelConf.ANGEL_LOG_PATH, LOCAL_FS + TMP_PATH + "/kmeansLog/log") # Set actionType train self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_TRAIN) runner = KMeansRunner() runner.train(self.conf) angel_client = AngelClientFactory.get(self.conf) angel_client.stop() def predict_onLocal_cluster(self): self.set_conf() LOCAL_FS = LocalFileSystem.DEFAULT_FS TMP_PATH = tempfile.gettempdir() # Set trainning data path self.conf.set(AngelConf.ANGEL_PREDICT_DATA_PATH, input_path) # Set load model path self.conf.set(AngelConf.ANGEL_LOAD_MODEL_PATH, LOCAL_FS + TMP_PATH + "/model") # Set predict result path self.conf.set(AngelConf.ANGEL_PREDICT_PATH, LOCAL_FS + TMP_PATH + "/predict") # Set actionType prediction self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_PREDICT) runner = KMeansRunner() runner.predict(self.conf) angel_client = AngelClientFactory.get(self.conf) angel_client.stop()
class FMLocalExample(oject): def __init__(self): self.conf = Configuration() def set_conf(self): """ set parameter values of self.conf """ # Feature number of train data feature_num = 236 # Total iteration number epoch_num = 20 # Rank rank = 5 # Regularization parameters reg0 = 0.0 reg1 = 0.0 reg2 = 0.001 # Learn rage lr = 0.001 stev = 0.1 # Set local deploy mode self.conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL") # Set basic self.configuration keys self.conf.set_boolean("mapred.mapper.new-api", True) self.conf.set( AngelConf.ANGEL_INPUTFORMAT_CLASS, 'org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat') self.conf.set_boolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, True) #set angel resource parameters #worker, #task, #PS self.conf.set_int(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1) self.conf.set_int(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1) self.conf.set_int(AngelConf.ANGEL_PS_NUMBER, 1) #set FM algorithm parameters #feature #epoch self.conf.set(MLConf.ML_FEATURE_NUM, str(feature_num)) self.conf.set(MLConf.ML_EPOCH_NUM, str(epoch_num)) self.conf.set(MLConf.ML_FM_RANK, str(rank)) self.conf.set(MLConf.ML_LEARN_RATE, str(lr)) self.conf.set(MLConf.ML_FM_REG0, str(reg0)) self.conf.set(MLConf.ML_FM_REG1, str(reg1)) self.conf.set(MLConf.ML_FM_REG2, str(reg2)) self.conf.set(MLConf.ML_FM_V_STDDEV, str(stev)) def train_on_local_cluster(self): self.set_conf() input_path = "./src/test/data/fm/food_fm_libsvm" LOCAL_FS = LocalFileSystem.DEFAULT_FS TMP_PATH = tempfile.gettempdir() save_path = LOCAL_FS + TMP_PATH + "/model" log_path = LOCAL_FS + TMP_PATH + "/LRlog" # Set trainning data path self.conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, input_path) # Set save model path self.conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, save_path) # Set log path self.conf.set(AngelConf.ANGEL_LOG_PATH, log_path) # Set actionType train self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_TRAIN()) runner = FMRunner() runner.train(self.conf) angel_client = AngelClientFactory.get(self.conf) angel_client.stop() def fm_classification(self): input_path = "./src/test/data/fm/a9a.train" LOCAL_FS = LocalFileSystem.DEFAULT_FS TMP_PATH = tempfile.gettempdir() save_path = LOCAL_FS + TMP_PATH + "/model" log_path = LOCAL_FS + TMP_PATH + "/LRlog" # Set trainning data path self.conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, input_path) # Set save model path self.conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, save_path) # Set log path self.conf.set(AngelConf.ANGEL_LOG_PATH, log_path) # Set actionType train self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_TRAIN) # Set learnType self.conf.set(MLConf.ML_FM_LEARN_TYPE, "c") # Set feature number self.conf.set(MLConf.ML_FEATURE_NUM, str(124)) runner = FMRunner() runner.train(self.conf) angel_client = AngelClientFactory.get(self.conf) angel_client.stop()
class LinearRegLocalExample(object): """ Linear Regression Example used for user test, similar to "com.tencent.angel.example.ml.LinearRegLocalExample". """ def __init__(self): self.conf= Configuration() def set_conf(self): """ Set up self.configuration for runtime environment. """ # Feature number of train data feature_num = 101 # Total iteration number epoch_num = 20 # Validation sample ratio v_ratio = 0.5 # Data format,libsvm or dummy data_fmt = 'libsvm' # Train batch number per epoch sp_ratio = 1 # Learning rate learn_rate = 0.1 # Decay of learning rate decay = 0.01 # Regularization coefficient reg = 0 # Set job queue, if you use YARN deploy mode, you can set job queue by # self.conf.set('mapreduce.job.queue.name', 'default') # Set local deploy mode self.conf.set(AngelConf.ANGEL_DEPLOY_MODE, 'LOCAL') # Set basic self.configuration keys self.conf.set_boolean('mapred.mapper.new-api', True) self.conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, 'org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat') self.conf.set_boolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, True) # Set data format self.conf.set(MLConf.ML_DATAFORMAT, data_fmt) # set angel resource parameters #worker, #tast, #ps self.conf.set_int(AngelConf.ANGEL_WORKERGROUP_NUMBER, 2) self.conf.set_int(AngelConf.ANGEL_WORKER_TASK_NUMBER, 10) self.conf.set_int(AngelConf.ANGEL_PS_NUMBER, 2) # set sgd LR algorithim parameters # feature # epoch self.conf.set(MLConf.ML_FEATURE_NUM, str(feature_num)) self.conf.set(MLConf.ML_EPOCH_NUM, str(epoch_num)) self.conf.set(MLConf.ML_BATCH_SAMPLE_Ratio, str(sp_ratio)) self.conf.set(MLConf.ML_VALIDATE_RATIO, str(v_ratio)) self.conf.set(MLConf.ML_LEARN_RATE, str(learn_rate)) self.conf.set(MLConf.ML_LEARN_DECAY, str(decay)) self.conf.set(MLConf.ML_REG_L2, str(reg)) def train_on_local_cluster(self): """ Train model on local cluster """ self.set_conf() input_path = '../data/exampledata/LinearRegression' LOCAL_FS = LocalFileSystem.DEFAULT_FS TMP_PATH = tempfile.gettempdir() log_path = ".src/test/log" model_path = 'file:///tmp/angel/model' self.conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, input_path) self.conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, model_path) self.conf.set(AngelConf.ANGEL_LOG_PATH, log_path) self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_TRAIN) self.conf.set("fs.defaultFS", LOCAL_FS + TMP_PATH) runner = LinearRegRunner() runner.train(self.conf) def inc_train(self): self.set_conf() input_path = "../data/exampledata/LinearRegression/LinearReg100.train" LOCAL_FS = LocalFileSystem.DEFAULT_FS TMP_PATH = tempfile.gettempdir() log_path = "./src/test/log" # Set trainning data path self.conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, inputPath) # Set load model path self.conf.set(AngelConf.ANGEL_LOAD_MODEL_PATH, LOCAL_FS + TMP_PATH + "/model") # Set save model path self.conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/newmodel") # Set actionType incremental train self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_INC_TRAIN()) # Set log path self.conf.set(AngelConf.ANGEL_LOG_PATH, logPath) runner = LinearRegRunner() runner.incTrain(self.conf) def predict(self): self.set_conf() input_path = "../data/exampledata/LinearRegression/LinearReg100.train" LOCAL_FS = LocalFileSystem.DEFAULT_FS TMP_PATH = tempfile.gettempdir() # Set trainning data path self.conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, inputPath) # Set load model path self.conf.set(AngelConf.ANGEL_LOAD_MODEL_PATH, LOCAL_FS + TMP_PATH + "/model") # Set predict result path self.conf.set(AngelConf.ANGEL_PREDICT_PATH, LOCAL_FS + TMP_PATH + "/predict") # Set actionType prediction self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_PREDICT()) runner = LinearRegRunner() runner.predict(self.conf)
class MFLocalExample(object): def __init__(self): self.conf = Configuration() self.MLConf = MLConf() def set_conf(self): inputPath = "../../data/exampledata/MFLocalExampleData" # Set local deploy mode self.conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL") # Set basic self.configuration keys self.conf.set_boolean("mapred.mapper.new-api", True) self.conf.set( AngelConf.ANGEL_INPUTFORMAT_CLASS, 'org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat') self.conf.set_boolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, True) # set angel resource parameters #worker, #task, #PS self.conf.set_int(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1) self.conf.set_int(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1) self.conf.set_int(AngelConf.ANGEL_PS_NUMBER, 1) LOCAL_FS = LocalFileSystem.DEFAULT_FS TMP_PATH = tempfile.gettempdir() # Set trainning data, and save model path self.conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, inputPath) self.conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/model") self.conf.set(AngelConf.ANGEL_LOG_PATH, LOCAL_FS + TMP_PATH + "/log") # Set actionType train self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_TRAIN) # Set MF algorithm parameters self.conf.set(MLConf.ML_MF_RANK, "200") self.conf.set(MLConf.ML_EPOCH_NUM, "8") self.conf.set(MLConf.ML_MF_ROW_BATCH_NUM, "2") self.conf.set(MLConf.ML_MF_ITEM_NUM, "1683") self.conf.set(MLConf.ML_MF_LAMBDA, "0.01") self.conf.set(MLConf.ML_MF_ETA, "0.0054") def train(self): self.set_conf() runner = MatrixFactorizationRunner() runner.train(self.conf)
class GBDTExample(object): def __init__(self): self.conf = Configuration() def set_conf(self): """ Input Path, please modify ${YOUR_ANGEL_HOME} as your local angel installation path, e.g. if your path is /home/angel/angel_1.3.0, your input_path should be: "file:///home/angel/angel_1.3.0/data/exampledata/GBDTLocalExampleData/agaricus.txt.train", and your out_path could be: "file:///home/angel/angel_1.3.0/data/output" :return: """ cate_feat = "0:2,1:2,2:2,3:2,4:2,5:2,6:2,7:2,8:2,9:2,10:2,11:2,12:2,13:2,14:2,15:2,16:2,17:2,18:2,19:2,20:2," \ "21:2,22:2,23:2,24:2,25:2,26:2,27:2,28:2,29:2,30:2,31:2,32:2,33:2,34:2,35:2,36:2,37:2,38:2,39:2,40:2," \ "41:2,42:2,43:2,44:2,45:2,46:2,47:2,48:2,49:2,50:2,51:2,52:2,53:2,54:2,55:2,56:2,57:2,58:2,59:2,60:2," \ "61:2,62:2,63:2,64:2,65:2,66:2,67:2,68:2,69:2,70:2,71:2,72:2,73:2,74:2,75:2,76:2,77:2,78:2,79:2,80:2," \ "81:2,82:2,83:2,84:2,85:2,86:2,87:2,88:2,89:2,90:2,91:2,92:2,93:2,94:2,95:2,96:2,97:2,98:2,99:2,100:2," \ "101:2,102:2,103:2,104:2,105:2,106:2,107:2,108:2,109:2,110:2,111:2,112:2,113:2,114:2,115:2,116:2,117:2," \ "118:2,119:2,120:2,121:2,122:2,123:2,124:2,125:2,126:2" params = { AngelConf.ANGEL_DEPLOY_MODE: 'LOCAL', 'mapred.mapper.new-api': True, AngelConf.ANGEL_INPUTFORMAT_CLASS: 'org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat', AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST: True, AngelConf.ANGEL_WORKERGROUP_NUMBER: 1, AngelConf.ANGEL_WORKER_TASK_NUMBER: 1, AngelConf.ANGEL_PS_NUMBER: 1, MLConf.ML_DATA_FORMAT: 'libsvm', MLConf.ML_FEATURE_NUM: 127, MLConf.ML_FEATURE_NNZ: 25, MLConf.ML_GBDT_TREE_NUM: 2, MLConf.ML_GBDT_TREE_DEPTH: 2, MLConf.ML_GBDT_SPLIT_NUM: 10, MLConf.ML_GBDT_SAMPLE_RATIO: 1.0, MLConf.ML_LEARN_RATE: 0.01, MLConf.ML_GBDT_CATE_FEAT: cate_feat } self.conf.update(params) def train(self): self.set_conf() local_fs = LocalFileSystem.DEFAULT_FS tmp_path = tempfile.gettempdir() save_path = local_fs + tmp_path + "/model" log_path = local_fs + tmp_path + "/GBDTlog" input_path = "data/exampledata/GBDTLocalExampleData/agaricus.txt.train" output_path = "data/output" self.conf[AngelConf.ANGEL_TRAIN_DATA_PATH] = input_path self.conf[AngelConf.ANGEL_SAVE_MODEL_PATH] = output_path self.conf[AngelConf.ANGEL_SAVE_MODEL_PATH] = save_path # Set log path self.conf[AngelConf.ANGEL_LOG_PATH] = log_path # Set actionType train self.conf[AngelConf.ANGEL_ACTION_TYPE] = MLConf.ANGEL_ML_TRAIN runner = GBDTRunner() runner.train(self.conf) def predict(self): self.set_conf() # Load Model from HDFS. tmp_path = tempfile.gettempdir() self.conf["gbdt.split.feature"] = tmp_path + "/out/xxx" self.conf["gbdt.split.value"] = tmp_path + "/out/xxx" runner = GBDTRunner() runner.predict(self.conf)
class FMLocalExample(oject): def __init__(self): self.conf = Configuration() def set_conf(self): """ set parameter values of self.conf """ # Feature number of train data feature_num = 236 # Total iteration number epoch_num = 20 # Rank rank = 5 # Regularization parameters reg0 = 0.0 reg1 = 0.0 reg2 = 0.001 # Learn rage lr = 0.001 stev = 0.1 # Set local deploy mode self.conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL") # Set basic self.configuration keys self.conf.set_boolean("mapred.mapper.new-api", True) self.conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, 'org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat') self.conf.set_boolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, True) #set angel resource parameters #worker, #task, #PS self.conf.set_int(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1) self.conf.set_int(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1) self.conf.set_int(AngelConf.ANGEL_PS_NUMBER, 1) #set FM algorithm parameters #feature #epoch self.conf.set(MLConf.ML_FEATURE_NUM, str(feature_num)) self.conf.set(MLConf.ML_EPOCH_NUM, str(epoch_num)) self.conf.set(MLConf.ML_FM_RANK, str(rank)) self.conf.set(MLConf.ML_LEARN_RATE, str(lr)) self.conf.set(MLConf.ML_FM_REG0, str(reg0)) self.conf.set(MLConf.ML_FM_REG1, str(reg1)) self.conf.set(MLConf.ML_FM_REG2, str(reg2)) self.conf.set(MLConf.ML_FM_V_STDDEV, str(stev)) def train_on_local_cluster(self): self.set_conf() input_path = "./src/test/data/fm/food_fm_libsvm" LOCAL_FS = LocalFileSystem.DEFAULT_FS TMP_PATH = tempfile.gettempdir() save_path = LOCAL_FS + TMP_PATH + "/model" log_path = LOCAL_FS + TMP_PATH + "/LRlog" # Set trainning data path self.conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, input_path) # Set save model path self.conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, save_path) # Set log path self.conf.set(AngelConf.ANGEL_LOG_PATH, log_path) # Set actionType train self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_TRAIN()) runner = FMRunner() runner.train(self.conf) angel_client = AngelClientFactory.get(self.conf) angel_client.stop() def fm_classification(self): input_path = "./src/test/data/fm/a9a.train" LOCAL_FS = LocalFileSystem.DEFAULT_FS TMP_PATH = tempfile.gettempdir() save_path = LOCAL_FS + TMP_PATH + "/model" log_path = LOCAL_FS + TMP_PATH + "/LRlog" # Set trainning data path self.conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, input_path) # Set save model path self.conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, save_path) # Set log path self.conf.set(AngelConf.ANGEL_LOG_PATH, log_path) # Set actionType train self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_TRAIN) # Set learnType self.conf.set(MLConf.ML_FM_LEARN_TYPE, "c") # Set feature number self.conf.set(MLConf.ML_FEATURE_NUM, str(124)) runner = FMRunner() runner.train(self.conf) angel_client = AngelClientFactory.get(self.conf) angel_client.stop()
class LinearRegLocalExample(object): """ Linear Regression Example used for user test, similar to "com.tencent.angel.example.ml.LinearRegLocalExample". """ def __init__(self): self.conf = Configuration() def set_conf(self): """ Set up self.configuration for runtime environment. """ # Feature number of train data feature_num = 101 # Total iteration number epoch_num = 20 # Validation sample ratio v_ratio = 0.5 # Data format,libsvm or dummy data_fmt = 'libsvm' # Train batch number per epoch sp_ratio = 1 # Learning rate learn_rate = 0.1 # Decay of learning rate decay = 0.01 # Regularization coefficient reg = 0 # Set job queue, if you use YARN deploy mode, you can set job queue by # self.conf.set('mapreduce.job.queue.name', 'default') # Set local deploy mode self.conf.set(AngelConf.ANGEL_DEPLOY_MODE, 'LOCAL') # Set basic self.configuration keys self.conf.set_boolean('mapred.mapper.new-api', True) self.conf.set( AngelConf.ANGEL_INPUTFORMAT_CLASS, 'org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat') self.conf.set_boolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, True) # Set data format self.conf.set(MLConf.ML_DATAFORMAT, data_fmt) # set angel resource parameters #worker, #tast, #ps self.conf.set_int(AngelConf.ANGEL_WORKERGROUP_NUMBER, 2) self.conf.set_int(AngelConf.ANGEL_WORKER_TASK_NUMBER, 10) self.conf.set_int(AngelConf.ANGEL_PS_NUMBER, 2) # set sgd LR algorithim parameters # feature # epoch self.conf.set(MLConf.ML_FEATURE_NUM, str(feature_num)) self.conf.set(MLConf.ML_EPOCH_NUM, str(epoch_num)) self.conf.set(MLConf.ML_BATCH_SAMPLE_Ratio, str(sp_ratio)) self.conf.set(MLConf.ML_VALIDATE_RATIO, str(v_ratio)) self.conf.set(MLConf.ML_LEARN_RATE, str(learn_rate)) self.conf.set(MLConf.ML_LEARN_DECAY, str(decay)) self.conf.set(MLConf.ML_REG_L2, str(reg)) def train_on_local_cluster(self): """ Train model on local cluster """ self.set_conf() input_path = '../data/exampledata/LinearRegression' LOCAL_FS = LocalFileSystem.DEFAULT_FS TMP_PATH = tempfile.gettempdir() log_path = ".src/test/log" model_path = 'file:///tmp/angel/model' self.conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, input_path) self.conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, model_path) self.conf.set(AngelConf.ANGEL_LOG_PATH, log_path) self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_TRAIN) self.conf.set("fs.defaultFS", LOCAL_FS + TMP_PATH) runner = LinearRegRunner() runner.train(self.conf) def inc_train(self): self.set_conf() input_path = "../data/exampledata/LinearRegression/LinearReg100.train" LOCAL_FS = LocalFileSystem.DEFAULT_FS TMP_PATH = tempfile.gettempdir() log_path = "./src/test/log" # Set trainning data path self.conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, inputPath) # Set load model path self.conf.set(AngelConf.ANGEL_LOAD_MODEL_PATH, LOCAL_FS + TMP_PATH + "/model") # Set save model path self.conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/newmodel") # Set actionType incremental train self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_INC_TRAIN()) # Set log path self.conf.set(AngelConf.ANGEL_LOG_PATH, logPath) runner = LinearRegRunner() runner.incTrain(self.conf) def predict(self): self.set_conf() input_path = "../data/exampledata/LinearRegression/LinearReg100.train" LOCAL_FS = LocalFileSystem.DEFAULT_FS TMP_PATH = tempfile.gettempdir() # Set trainning data path self.conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, inputPath) # Set load model path self.conf.set(AngelConf.ANGEL_LOAD_MODEL_PATH, LOCAL_FS + TMP_PATH + "/model") # Set predict result path self.conf.set(AngelConf.ANGEL_PREDICT_PATH, LOCAL_FS + TMP_PATH + "/predict") # Set actionType prediction self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_PREDICT()) runner = LinearRegRunner() runner.predict(self.conf)
def __init__(self): self.conf= Configuration() self.MLConf = MLConf()
class MFLocalExample(object): def __init__(self): self.conf= Configuration() self.MLConf = MLConf() def set_conf(self): inputPath = "../../data/exampledata/MFLocalExampleData" # Set local deploy mode self.conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL") # Set basic self.configuration keys self.conf.set_boolean("mapred.mapper.new-api", True) self.conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, 'org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat') self.conf.set_boolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, True) # set angel resource parameters #worker, #task, #PS self.conf.set_int(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1) self.conf.set_int(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1) self.conf.set_int(AngelConf.ANGEL_PS_NUMBER, 1) LOCAL_FS = LocalFileSystem.DEFAULT_FS TMP_PATH = tempfile.gettempdir() # Set trainning data, and save model path self.conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, inputPath) self.conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/model") self.conf.set(AngelConf.ANGEL_LOG_PATH, LOCAL_FS + TMP_PATH + "/log") # Set actionType train self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_TRAIN) # Set MF algorithm parameters self.conf.set(MLConf.ML_MF_RANK, "200") self.conf.set(MLConf.ML_EPOCH_NUM, "8") self.conf.set(MLConf.ML_MF_ROW_BATCH_NUM, "2") self.conf.set(MLConf.ML_MF_ITEM_NUM, "1683") self.conf.set(MLConf.ML_MF_LAMBDA, "0.01") self.conf.set(MLConf.ML_MF_ETA, "0.0054") def train(self): self.set_conf() runner = MatrixFactorizationRunner() runner.train(self.conf)
class GBDTExample(object): def __init__(self): self.conf = Configuration() def set_conf(self): """ Input Path, please modify ${YOUR_ANGEL_HOME} as your local angel installation path, e.g. if your path is /home/angel/angel_1.3.0, your input_path should be: "file:///home/angel/angel_1.3.0/data/exampledata/GBDTLocalExampleData/agaricus.txt.train", and your out_path could be: "file:///home/angel/angel_1.3.0/data/output" if you need, you can delete the annotation mark before Line35,Line36,Line61,Line62, so there is no need for you to pass the configs every time you submit the pyangel job. :return: """ # Feature number of train data feature_num = 127 # Number of nonzero features feature_nzz = 25 # Tree number tree_num = 2 # Tree depth tree_depth = 2 # Split number split_num = 10 # Feature sample ratio sample_ratio = 1.0 # Data format data_fmt = "libsvm" # Learning rate learn_rate = 0.01 # Use local deploy mode and dummy data spliter self.conf[AngelConf.ANGEL_DEPLOY_MODE] = "LOCAL" self.conf['mapred.mapper.new-api'] = True self.conf[ AngelConf. ANGEL_INPUTFORMAT_CLASS] = 'org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat' self.conf[AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST] = True # Set angel resource parameters #worker, #task, #PS self.conf[AngelConf.ANGEL_WORKERGROUP_NUMBER] = 1 self.conf[AngelConf.ANGEL_WORKER_TASK_NUMBER] = 1 self.conf[AngelConf.ANGEL_PS_NUMBER] = 1 # Set GBDT algorithm parameters self.conf[MLConf.ML_DATA_FORMAT] = data_fmt self.conf[MLConf.ML_FEATURE_NUM] = str(feature_num) self.conf[MLConf.ML_FEATURE_NNZ] = str(feature_nzz) self.conf[MLConf.ML_GBDT_TREE_NUM] = str(tree_num) self.conf[MLConf.ML_GBDT_TREE_DEPTH] = str(tree_depth) self.conf[MLConf.ML_GBDT_SPLIT_NUM] = str(split_num) self.conf[MLConf.ML_GBDT_SAMPLE_RATIO] = str(sample_ratio) self.conf[MLConf.ML_LEARN_RATE] = str(learn_rate) params = { AngelConf.ANGEL_DEPLOY_MODE: 'LOCAL', 'mapred.mapper.new-api': True, AngelConf.ANGEL_INPUTFORMAT_CLASS: 'org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat', AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST: True, AngelConf.ANGEL_WORKERGROUP_NUMBER: 1, AngelConf.ANGEL_WORKER_TASK_NUMBER: 1, AngelConf.ANGEL_PS_NUMBER: 1, MLConf.ML_DATA_FORMAT: 'libsvm', MLConf.ML_FEATURE_NUM: 127, MLConf.ML_FEATURE_NNZ: 25, MLConf.ML_GBDT_TREE_NUM: 2, MLConf.ML_GBDT_TREE_DEPTH: 2, MLConf.ML_GBDT_SPLIT_NUM: 10, MLConf.ML_GBDT_SAMPLE_RATIO: 1.0, MLConf.ML_LEARN_RATE: 0.01 } self.conf.load(params) def train(self): self.set_conf() LOCAL_FS = LocalFileSystem.DEFAULT_FS TMP_PATH = tempfile.gettempdir() save_path = LOCAL_FS + TMP_PATH + "/model" log_path = LOCAL_FS + TMP_PATH + "/GBDTlog" input_path = "data/exampledata/GBDTLocalExampleData/agaricus.txt.train" output_path = "data/output" self.conf[AngelConf.ANGEL_TRAIN_DATA_PATH] = input_path self.conf[AngelConf.ANGEL_SAVE_MODEL_PATH] = output_path self.conf[AngelConf.ANGEL_SAVE_MODEL_PATH] = save_path # Set log path self.conf[AngelConf.ANGEL_LOG_PATH] = log_path # Set actionType train self.conf[AngelConf.ANGEL_ACTION_TYPE] = MLConf.ANGEL_ML_TRAIN runner = GBDTRunner() runner.train(self.conf) def predict(self): self.set_conf() # Load Model from HDFS. tmp_path = tempfile.gettempdir() self.conf["gbdt.split.feature"] = tmp_path + "/out/xxx" self.conf["gbdt.split.value"] = tmp_path + "/out/xxx" runner = GBDTRunner() runner.predict(conf)