Ejemplo n.º 1
0
class SGDLRLocalExample(object):

    def __init__(self):
        self.conf = Configuration()

    def set_conf(self):

        # Feature number of train data
        feature_num = 124
        # Total iteration number
        epoch_num = 20
        # Validation sample Ratio
        v_ratio = 0.1
        # Data format, libsvm or dummy
        data_fmt = "libsvm"
        # Train batch number per epoch.
        sp_ratio = 1.0
        # Batch number
        batch_num = 10

        # Learning rate
        learn_rate = 1.0
        # Decay of learning rate
        decay = 0.1
        # Regularization coefficient
        reg = 0.2

        # Set local deploy mode
        self.conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL")

        # Set basic configuration keys
        self.conf.set_boolean("mapred.mapper.new-api", True)
        self.conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, 'org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat')
        self.conf.set_boolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, True)

        # Set data format
        self.conf.set(MLConf.ML_DATAFORMAT, data_fmt)

        # set angel resource parameters #worker, #task, #PS
        self.conf.set_int(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1)
        self.conf.set_int(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1)
        self.conf.set_int(AngelConf.ANGEL_PS_NUMBER, 1)

        # set sgd LR algorithm parameters #feature #epoch
        self.conf.set(MLConf.ML_FEATURE_NUM, str(feature_num))
        self.conf.set(MLConf.ML_EPOCH_NUM, str(epoch_num))
        self.conf.set(MLConf.ML_BATCH_SAMPLE_Ratio, str(sp_ratio))
        self.conf.set(MLConf.ML_VALIDATE_RATIO, str(v_ratio))
        self.conf.set(MLConf.ML_LEARN_RATE, str(learn_rate))
        self.conf.set(MLConf.ML_LEARN_DECAY, str(decay))
        self.conf.set(MLConf.ML_REG_L2, str(reg))

    def train_on_local_cluster(self):
        self.set_conf()
        input_path = "../data/exampledata/LRLocalExampleData/a9a.train"
        LOCAL_FS = LocalFileSystem.DEFAULT_FS
        TMP_PATH = tempfile.gettempdir()
        save_path = LOCAL_FS + TMP_PATH + "/model"
        log_path = LOCAL_FS + TMP_PATH + "/log"

        # Set trainning data path
        self.conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, input_path)
        # Set save model path
        self.conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, save_path)
        # Set log path
        self.conf.set(AngelConf.ANGEL_LOG_PATH, log_path)
        # Set actionType train
        self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_TRAIN)


        runner = LRRunner()
        runner.train(self.conf)


    def inc_train(self):
        self.set_conf()
        input_path = "../data/exampledata/LRLocalExampleData/a9a.train"
        LOCAL_FS = LocalFileSystem.DEFAULT_FS
        TMP_PATH = tempfile.gettempdir()
        load_path = LOCAL_FS + TMP_PATH + "/model"
        save_path = LOCAL_FS + TMP_PATH + "/newmodel"
        log_path = LOCAL_FS + TMP_PATH + "/log"

        # Set trainning data path
        self.conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, input_path)
        # Set load model path
        self.conf.set(AngelConf.ANGEL_LOAD_MODEL_PATH, load_path)
        # Set save model path
        self.conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, save_path)
        # Set log path
        self.conf.set(AngelConf.ANGEL_LOG_PATH, log_path)
        # Set actionType incremental train
        self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_INC_TRAIN)

        runner = LRRunner()
        runner.inc_train(self.conf)


    def predict(self):
        self.set_conf()
        input_path = "../data/exampledata/LRLocalExampleData/a9a.test"
        LOCAL_FS = LocalFileSystem.DEFAULT_FS
        TMP_PATH = System.getProperty("java.io.tmpdir", "/tmp")
        load_path = LOCAL_FS + TMP_PATH + "/model"
        save_path = LOCAL_FS + TMP_PATH + "/model"
        log_path = LOCAL_FS + TMP_PATH + "/log"
        predict_path = LOCAL_FS + TMP_PATH + "/predict"

        # Set trainning data path
        self.conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, input_path)
        # Set load model path
        self.conf.set(AngelConf.ANGEL_LOAD_MODEL_PATH, load_path)
        # Set predict result path
        self.conf.set(AngelConf.ANGEL_PREDICT_PATH, predict_path)
        # Set log path
        self.conf.set(AngelConf.ANGEL_LOG_PATH, log_path)
        # Set actionType prediction
        self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_PREDICT())

        runner = LRRunner()

        runner.predict(self.conf)
Ejemplo n.º 2
0
class LinearRegLocalExample(object):
    """
    Linear Regression Example used for user test, similar to "com.tencent.angel.example.ml.LinearRegLocalExample".
    """

    def __init__(self):
        self.conf= Configuration()

    def set_conf(self):
        """
        Set up self.configuration for runtime environment.
        """
        # Feature number of train data
        feature_num = 101
        # Total iteration number
        epoch_num = 20
        # Validation sample ratio
        v_ratio = 0.5
        # Data format,libsvm or dummy
        data_fmt = 'libsvm'
        # Train batch number per epoch
        sp_ratio = 1

        # Learning rate
        learn_rate = 0.1
        # Decay of learning rate
        decay = 0.01
        # Regularization coefficient
        reg = 0

        # Set job queue, if you use YARN deploy mode, you can set job queue by
        # self.conf.set('mapreduce.job.queue.name', 'default')

        # Set local deploy mode
        self.conf.set(AngelConf.ANGEL_DEPLOY_MODE, 'LOCAL')

        # Set basic self.configuration keys
        self.conf.set_boolean('mapred.mapper.new-api', True)
        self.conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, 'org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat')
        self.conf.set_boolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, True)

        # Set data format
        self.conf.set(MLConf.ML_DATAFORMAT, data_fmt)

        # set angel resource parameters #worker, #tast, #ps
        self.conf.set_int(AngelConf.ANGEL_WORKERGROUP_NUMBER, 2)
        self.conf.set_int(AngelConf.ANGEL_WORKER_TASK_NUMBER, 10)
        self.conf.set_int(AngelConf.ANGEL_PS_NUMBER, 2)

        # set sgd LR algorithim parameters # feature # epoch
        self.conf.set(MLConf.ML_FEATURE_NUM, str(feature_num))
        self.conf.set(MLConf.ML_EPOCH_NUM, str(epoch_num))
        self.conf.set(MLConf.ML_BATCH_SAMPLE_Ratio, str(sp_ratio))
        self.conf.set(MLConf.ML_VALIDATE_RATIO, str(v_ratio))
        self.conf.set(MLConf.ML_LEARN_RATE, str(learn_rate))
        self.conf.set(MLConf.ML_LEARN_DECAY, str(decay))
        self.conf.set(MLConf.ML_REG_L2, str(reg))


    def train_on_local_cluster(self):
        """
        Train model on local cluster
        """
        self.set_conf()
        input_path = '../data/exampledata/LinearRegression'
        LOCAL_FS = LocalFileSystem.DEFAULT_FS
        TMP_PATH = tempfile.gettempdir()
        log_path = ".src/test/log"
        model_path = 'file:///tmp/angel/model'


        self.conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, input_path)
        self.conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, model_path)
        self.conf.set(AngelConf.ANGEL_LOG_PATH, log_path)
        self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_TRAIN)
        self.conf.set("fs.defaultFS", LOCAL_FS + TMP_PATH)

        runner = LinearRegRunner()
        runner.train(self.conf)


    def inc_train(self):
        self.set_conf()
        input_path = "../data/exampledata/LinearRegression/LinearReg100.train"
        LOCAL_FS = LocalFileSystem.DEFAULT_FS
        TMP_PATH = tempfile.gettempdir()
        log_path = "./src/test/log"

        # Set trainning data path
        self.conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, inputPath)
        # Set load model path
        self.conf.set(AngelConf.ANGEL_LOAD_MODEL_PATH, LOCAL_FS + TMP_PATH + "/model")
        # Set save model path
        self.conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/newmodel")
        # Set actionType incremental train
        self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_INC_TRAIN())
        # Set log path
        self.conf.set(AngelConf.ANGEL_LOG_PATH, logPath)

        runner = LinearRegRunner()
        runner.incTrain(self.conf)

    def predict(self):
        self.set_conf()
        input_path = "../data/exampledata/LinearRegression/LinearReg100.train"
        LOCAL_FS = LocalFileSystem.DEFAULT_FS
        TMP_PATH = tempfile.gettempdir()

        # Set trainning data path
        self.conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, inputPath)
        # Set load model path
        self.conf.set(AngelConf.ANGEL_LOAD_MODEL_PATH, LOCAL_FS + TMP_PATH + "/model")
        # Set predict result path
        self.conf.set(AngelConf.ANGEL_PREDICT_PATH, LOCAL_FS + TMP_PATH + "/predict")
        # Set actionType prediction
        self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_PREDICT())
        runner = LinearRegRunner()

        runner.predict(self.conf)
Ejemplo n.º 3
0
class KmeansLocalExample(object):
    def __init__(self):
        self.conf = Configuration()
        self.MLConf = MLConf()

    def set_conf(self):
        data_fmt = "libsvm"

        # Cluster center number
        center_num = 3
        # Feature number of train data
        feature_num = 4
        # Total iteration number
        epoch_num = 20
        # Sample ratio per mini-batch
        sample_ratio = 1.0
        # C
        c = 0.15

        # Set local deploy mode
        self.conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL")

        # Set basic self.configuration key
        self.conf.set_boolean("mapred.mapper.new-api", True)
        self.conf.set(
            AngelConf.ANGEL_INPUTFORMAT_CLASS,
            'org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat')
        self.conf.set_boolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST,
                              True)

        # Set angel resource parameters #worker, #task, #PS
        self.conf.set_int(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1)
        self.conf.set_int(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1)
        self.conf.set_int(AngelConf.ANGEL_PS_NUMBER, 1)

        # Set Kmeans algorithm parameters #cluster #feature #epoch
        self.conf.set(MLConf.KMEANS_CENTER_NUM, str(center_num))
        self.conf.set(MLConf.ML_FEATURE_NUM, str(feature_num))
        self.conf.set(MLConf.ML_EPOCH_NUM, str(epoch_num))
        self.conf.set(MLConf.KMEANS_SAMPLE_RATIO_PERBATCH, str(sample_ratio))
        self.conf.set(MLConf.kMEANS_C, str(c))

        # Set data format
        self.conf.set(MLConf.ML_DATAFORMAT, data_fmt)

    def train(self):
        self.set_conf()
        input_path = "data/exampledata/clusteringLocalExampleData/iris"
        LOCAL_FS = LocalFileSystem.DEFAULT_FS
        TMP_PATH = tempfile.gettempdir()
        # Set trainning data path
        self.conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, input_path)
        # Set save model path
        self.conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH,
                      LOCAL_FS + TMP_PATH + "/model")
        # Set log sava path
        self.conf.set(AngelConf.ANGEL_LOG_PATH,
                      LOCAL_FS + TMP_PATH + "/kmeansLog/log")
        # Set actionType train
        self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_TRAIN)

        runner = KMeansRunner()
        runner.train(self.conf)

        angel_client = AngelClientFactory.get(self.conf)
        angel_client.stop()

    def predict_onLocal_cluster(self):
        self.set_conf()
        LOCAL_FS = LocalFileSystem.DEFAULT_FS
        TMP_PATH = tempfile.gettempdir()
        # Set trainning data path
        self.conf.set(AngelConf.ANGEL_PREDICT_DATA_PATH, input_path)
        # Set load model path
        self.conf.set(AngelConf.ANGEL_LOAD_MODEL_PATH,
                      LOCAL_FS + TMP_PATH + "/model")
        # Set predict result path
        self.conf.set(AngelConf.ANGEL_PREDICT_PATH,
                      LOCAL_FS + TMP_PATH + "/predict")
        # Set actionType prediction
        self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_PREDICT)

        runner = KMeansRunner()
        runner.predict(self.conf)

        angel_client = AngelClientFactory.get(self.conf)
        angel_client.stop()
Ejemplo n.º 4
0
class FMLocalExample(oject):
    def __init__(self):
        self.conf = Configuration()

    def set_conf(self):
        """
        set parameter values of self.conf
        """

        # Feature number of train data
        feature_num = 236
        # Total iteration number
        epoch_num = 20
        # Rank
        rank = 5
        # Regularization parameters
        reg0 = 0.0
        reg1 = 0.0
        reg2 = 0.001
        # Learn rage
        lr = 0.001
        stev = 0.1

        # Set local deploy mode
        self.conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL")

        # Set basic self.configuration keys
        self.conf.set_boolean("mapred.mapper.new-api", True)
        self.conf.set(
            AngelConf.ANGEL_INPUTFORMAT_CLASS,
            'org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat')
        self.conf.set_boolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST,
                              True)

        #set angel resource parameters #worker, #task, #PS
        self.conf.set_int(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1)
        self.conf.set_int(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1)
        self.conf.set_int(AngelConf.ANGEL_PS_NUMBER, 1)

        #set FM algorithm parameters #feature #epoch
        self.conf.set(MLConf.ML_FEATURE_NUM, str(feature_num))
        self.conf.set(MLConf.ML_EPOCH_NUM, str(epoch_num))
        self.conf.set(MLConf.ML_FM_RANK, str(rank))
        self.conf.set(MLConf.ML_LEARN_RATE, str(lr))
        self.conf.set(MLConf.ML_FM_REG0, str(reg0))
        self.conf.set(MLConf.ML_FM_REG1, str(reg1))
        self.conf.set(MLConf.ML_FM_REG2, str(reg2))
        self.conf.set(MLConf.ML_FM_V_STDDEV, str(stev))

    def train_on_local_cluster(self):
        self.set_conf()
        input_path = "./src/test/data/fm/food_fm_libsvm"
        LOCAL_FS = LocalFileSystem.DEFAULT_FS
        TMP_PATH = tempfile.gettempdir()
        save_path = LOCAL_FS + TMP_PATH + "/model"
        log_path = LOCAL_FS + TMP_PATH + "/LRlog"

        # Set trainning data path
        self.conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, input_path)
        # Set save model path
        self.conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, save_path)
        # Set log path
        self.conf.set(AngelConf.ANGEL_LOG_PATH, log_path)
        # Set actionType train
        self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_TRAIN())

        runner = FMRunner()
        runner.train(self.conf)

        angel_client = AngelClientFactory.get(self.conf)
        angel_client.stop()

    def fm_classification(self):
        input_path = "./src/test/data/fm/a9a.train"
        LOCAL_FS = LocalFileSystem.DEFAULT_FS
        TMP_PATH = tempfile.gettempdir()
        save_path = LOCAL_FS + TMP_PATH + "/model"
        log_path = LOCAL_FS + TMP_PATH + "/LRlog"

        # Set trainning data path
        self.conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, input_path)
        # Set save model path
        self.conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, save_path)
        # Set log path
        self.conf.set(AngelConf.ANGEL_LOG_PATH, log_path)
        # Set actionType train
        self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_TRAIN)
        # Set learnType
        self.conf.set(MLConf.ML_FM_LEARN_TYPE, "c")
        # Set feature number
        self.conf.set(MLConf.ML_FEATURE_NUM, str(124))

        runner = FMRunner()
        runner.train(self.conf)

        angel_client = AngelClientFactory.get(self.conf)
        angel_client.stop()
Ejemplo n.º 5
0
class MFLocalExample(object):
    def __init__(self):
        self.conf = Configuration()
        self.MLConf = MLConf()

    def set_conf(self):
        inputPath = "../../data/exampledata/MFLocalExampleData"
        # Set local deploy mode
        self.conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL")
        # Set basic self.configuration keys
        self.conf.set_boolean("mapred.mapper.new-api", True)
        self.conf.set(
            AngelConf.ANGEL_INPUTFORMAT_CLASS,
            'org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat')
        self.conf.set_boolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST,
                              True)

        # set angel resource parameters #worker, #task, #PS
        self.conf.set_int(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1)
        self.conf.set_int(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1)
        self.conf.set_int(AngelConf.ANGEL_PS_NUMBER, 1)

        LOCAL_FS = LocalFileSystem.DEFAULT_FS
        TMP_PATH = tempfile.gettempdir()

        # Set trainning data, and save model path
        self.conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, inputPath)
        self.conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH,
                      LOCAL_FS + TMP_PATH + "/model")
        self.conf.set(AngelConf.ANGEL_LOG_PATH, LOCAL_FS + TMP_PATH + "/log")
        # Set actionType train
        self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_TRAIN)

        # Set MF algorithm parameters
        self.conf.set(MLConf.ML_MF_RANK, "200")
        self.conf.set(MLConf.ML_EPOCH_NUM, "8")
        self.conf.set(MLConf.ML_MF_ROW_BATCH_NUM, "2")
        self.conf.set(MLConf.ML_MF_ITEM_NUM, "1683")
        self.conf.set(MLConf.ML_MF_LAMBDA, "0.01")
        self.conf.set(MLConf.ML_MF_ETA, "0.0054")

    def train(self):
        self.set_conf()
        runner = MatrixFactorizationRunner()
        runner.train(self.conf)
Ejemplo n.º 6
0
class FMLocalExample(oject):

    def __init__(self):
        self.conf = Configuration()

    def set_conf(self):
        """
        set parameter values of self.conf
        """

        # Feature number of train data
        feature_num = 236
        # Total iteration number
        epoch_num = 20
        # Rank
        rank = 5
        # Regularization parameters
        reg0 = 0.0
        reg1 = 0.0
        reg2 = 0.001
        # Learn rage
        lr = 0.001
        stev = 0.1

        # Set local deploy mode
        self.conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL")

        # Set basic self.configuration keys
        self.conf.set_boolean("mapred.mapper.new-api", True)
        self.conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, 'org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat')
        self.conf.set_boolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, True)

        #set angel resource parameters #worker, #task, #PS
        self.conf.set_int(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1)
        self.conf.set_int(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1)
        self.conf.set_int(AngelConf.ANGEL_PS_NUMBER, 1)

        #set FM algorithm parameters #feature #epoch
        self.conf.set(MLConf.ML_FEATURE_NUM, str(feature_num))
        self.conf.set(MLConf.ML_EPOCH_NUM, str(epoch_num))
        self.conf.set(MLConf.ML_FM_RANK, str(rank))
        self.conf.set(MLConf.ML_LEARN_RATE, str(lr))
        self.conf.set(MLConf.ML_FM_REG0, str(reg0))
        self.conf.set(MLConf.ML_FM_REG1, str(reg1))
        self.conf.set(MLConf.ML_FM_REG2, str(reg2))
        self.conf.set(MLConf.ML_FM_V_STDDEV, str(stev))

    def train_on_local_cluster(self):
        self.set_conf()
        input_path = "./src/test/data/fm/food_fm_libsvm"
        LOCAL_FS = LocalFileSystem.DEFAULT_FS
        TMP_PATH = tempfile.gettempdir()
        save_path = LOCAL_FS + TMP_PATH + "/model"
        log_path = LOCAL_FS + TMP_PATH + "/LRlog"

        # Set trainning data path
        self.conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, input_path)
        # Set save model path
        self.conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, save_path)
        # Set log path
        self.conf.set(AngelConf.ANGEL_LOG_PATH, log_path)
        # Set actionType train
        self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_TRAIN())

        runner = FMRunner()
        runner.train(self.conf)

        angel_client = AngelClientFactory.get(self.conf)
        angel_client.stop()

    def fm_classification(self):
        input_path = "./src/test/data/fm/a9a.train"
        LOCAL_FS = LocalFileSystem.DEFAULT_FS
        TMP_PATH = tempfile.gettempdir()
        save_path = LOCAL_FS + TMP_PATH + "/model"
        log_path = LOCAL_FS + TMP_PATH + "/LRlog"

        # Set trainning data path
        self.conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, input_path)
        # Set save model path
        self.conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, save_path)
        # Set log path
        self.conf.set(AngelConf.ANGEL_LOG_PATH, log_path)
        # Set actionType train
        self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_TRAIN)
        # Set learnType
        self.conf.set(MLConf.ML_FM_LEARN_TYPE, "c")
        # Set feature number
        self.conf.set(MLConf.ML_FEATURE_NUM, str(124))

        runner = FMRunner()
        runner.train(self.conf)

        angel_client = AngelClientFactory.get(self.conf)
        angel_client.stop()
Ejemplo n.º 7
0
class LinearRegLocalExample(object):
    """
    Linear Regression Example used for user test, similar to "com.tencent.angel.example.ml.LinearRegLocalExample".
    """
    def __init__(self):
        self.conf = Configuration()

    def set_conf(self):
        """
        Set up self.configuration for runtime environment.
        """
        # Feature number of train data
        feature_num = 101
        # Total iteration number
        epoch_num = 20
        # Validation sample ratio
        v_ratio = 0.5
        # Data format,libsvm or dummy
        data_fmt = 'libsvm'
        # Train batch number per epoch
        sp_ratio = 1

        # Learning rate
        learn_rate = 0.1
        # Decay of learning rate
        decay = 0.01
        # Regularization coefficient
        reg = 0

        # Set job queue, if you use YARN deploy mode, you can set job queue by
        # self.conf.set('mapreduce.job.queue.name', 'default')

        # Set local deploy mode
        self.conf.set(AngelConf.ANGEL_DEPLOY_MODE, 'LOCAL')

        # Set basic self.configuration keys
        self.conf.set_boolean('mapred.mapper.new-api', True)
        self.conf.set(
            AngelConf.ANGEL_INPUTFORMAT_CLASS,
            'org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat')
        self.conf.set_boolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST,
                              True)

        # Set data format
        self.conf.set(MLConf.ML_DATAFORMAT, data_fmt)

        # set angel resource parameters #worker, #tast, #ps
        self.conf.set_int(AngelConf.ANGEL_WORKERGROUP_NUMBER, 2)
        self.conf.set_int(AngelConf.ANGEL_WORKER_TASK_NUMBER, 10)
        self.conf.set_int(AngelConf.ANGEL_PS_NUMBER, 2)

        # set sgd LR algorithim parameters # feature # epoch
        self.conf.set(MLConf.ML_FEATURE_NUM, str(feature_num))
        self.conf.set(MLConf.ML_EPOCH_NUM, str(epoch_num))
        self.conf.set(MLConf.ML_BATCH_SAMPLE_Ratio, str(sp_ratio))
        self.conf.set(MLConf.ML_VALIDATE_RATIO, str(v_ratio))
        self.conf.set(MLConf.ML_LEARN_RATE, str(learn_rate))
        self.conf.set(MLConf.ML_LEARN_DECAY, str(decay))
        self.conf.set(MLConf.ML_REG_L2, str(reg))

    def train_on_local_cluster(self):
        """
        Train model on local cluster
        """
        self.set_conf()
        input_path = '../data/exampledata/LinearRegression'
        LOCAL_FS = LocalFileSystem.DEFAULT_FS
        TMP_PATH = tempfile.gettempdir()
        log_path = ".src/test/log"
        model_path = 'file:///tmp/angel/model'

        self.conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, input_path)
        self.conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, model_path)
        self.conf.set(AngelConf.ANGEL_LOG_PATH, log_path)
        self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_TRAIN)
        self.conf.set("fs.defaultFS", LOCAL_FS + TMP_PATH)

        runner = LinearRegRunner()
        runner.train(self.conf)

    def inc_train(self):
        self.set_conf()
        input_path = "../data/exampledata/LinearRegression/LinearReg100.train"
        LOCAL_FS = LocalFileSystem.DEFAULT_FS
        TMP_PATH = tempfile.gettempdir()
        log_path = "./src/test/log"

        # Set trainning data path
        self.conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, inputPath)
        # Set load model path
        self.conf.set(AngelConf.ANGEL_LOAD_MODEL_PATH,
                      LOCAL_FS + TMP_PATH + "/model")
        # Set save model path
        self.conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH,
                      LOCAL_FS + TMP_PATH + "/newmodel")
        # Set actionType incremental train
        self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_INC_TRAIN())
        # Set log path
        self.conf.set(AngelConf.ANGEL_LOG_PATH, logPath)

        runner = LinearRegRunner()
        runner.incTrain(self.conf)

    def predict(self):
        self.set_conf()
        input_path = "../data/exampledata/LinearRegression/LinearReg100.train"
        LOCAL_FS = LocalFileSystem.DEFAULT_FS
        TMP_PATH = tempfile.gettempdir()

        # Set trainning data path
        self.conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, inputPath)
        # Set load model path
        self.conf.set(AngelConf.ANGEL_LOAD_MODEL_PATH,
                      LOCAL_FS + TMP_PATH + "/model")
        # Set predict result path
        self.conf.set(AngelConf.ANGEL_PREDICT_PATH,
                      LOCAL_FS + TMP_PATH + "/predict")
        # Set actionType prediction
        self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_PREDICT())
        runner = LinearRegRunner()

        runner.predict(self.conf)
Ejemplo n.º 8
0
class MFLocalExample(object):

    def __init__(self):
        self.conf= Configuration()
        self.MLConf = MLConf()

    def set_conf(self):
        inputPath = "../../data/exampledata/MFLocalExampleData"
        # Set local deploy mode
        self.conf.set(AngelConf.ANGEL_DEPLOY_MODE, "LOCAL")
        # Set basic self.configuration keys
        self.conf.set_boolean("mapred.mapper.new-api", True)
        self.conf.set(AngelConf.ANGEL_INPUTFORMAT_CLASS, 'org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat')
        self.conf.set_boolean(AngelConf.ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST, True)

        # set angel resource parameters #worker, #task, #PS
        self.conf.set_int(AngelConf.ANGEL_WORKERGROUP_NUMBER, 1)
        self.conf.set_int(AngelConf.ANGEL_WORKER_TASK_NUMBER, 1)
        self.conf.set_int(AngelConf.ANGEL_PS_NUMBER, 1)

        LOCAL_FS = LocalFileSystem.DEFAULT_FS
        TMP_PATH = tempfile.gettempdir()

        # Set trainning data, and save model path
        self.conf.set(AngelConf.ANGEL_TRAIN_DATA_PATH, inputPath)
        self.conf.set(AngelConf.ANGEL_SAVE_MODEL_PATH, LOCAL_FS + TMP_PATH + "/model")
        self.conf.set(AngelConf.ANGEL_LOG_PATH, LOCAL_FS + TMP_PATH + "/log")
        # Set actionType train
        self.conf.set(AngelConf.ANGEL_ACTION_TYPE, MLConf.ANGEL_ML_TRAIN)

        # Set MF algorithm parameters
        self.conf.set(MLConf.ML_MF_RANK, "200")
        self.conf.set(MLConf.ML_EPOCH_NUM, "8")
        self.conf.set(MLConf.ML_MF_ROW_BATCH_NUM, "2")
        self.conf.set(MLConf.ML_MF_ITEM_NUM, "1683")
        self.conf.set(MLConf.ML_MF_LAMBDA, "0.01")
        self.conf.set(MLConf.ML_MF_ETA, "0.0054")


    def train(self):
        self.set_conf()
        runner = MatrixFactorizationRunner()
        runner.train(self.conf)