예제 #1
0
    def __init__(self,
                 host="localhost",
                 port=8080,
                 sname="",
                 description="",
                 mllib="caffe",
                 service_parameters_input=None,
                 service_parameters_mllib=None,
                 service_parameters_output=None,
                 model=None,
                 tmp_dir=None):

        self.host = host
        self.port = port
        self.sname = sname
        self.model = model
        self.description = description
        self.mllib = mllib
        self.tmp_dir = tmp_dir

        self.service_parameters_input = service_parameters_input
        self.service_parameters_mllib = service_parameters_mllib
        self.service_parameters_output = service_parameters_output

        self.n_pred = 0
        self.n_fit = 0
        self.calls = []
        self.answers = []
        # self.train_logs = None
        super(AbstractModels, self).__init__(self.host, self.port)

        if self.sname:
            for service in self.get_info()['head']['services']:
                if service['name'] == self.sname.lower(
                ):  # DD lowercases services' name
                    self.delete_service(self.sname, clear="mem")
        else:
            self.sname = "pyDD_{}".format(time_utils.fulltimestamp())
            self.description = self.sname

        # Check if a repository is given otherwise creates one
        if "repository" not in self.model or not self.model["repository"]:
            self.repository = tempfile.mkdtemp(prefix="pydd_",
                                               dir=self.tmp_dir)
            self.model["repository"] = self.repository
            os_utils._create_dirs([self.model["repository"]])
        else:
            assert os.path.exists(
                self.model["repository"]), "{} does not exist".format(
                    self.model["repository"])

        json_dump = self.create_service(self.sname, self.model,
                                        self.description, self.mllib,
                                        self.service_parameters_input,
                                        self.service_parameters_mllib,
                                        self.service_parameters_output)
        self.answers.append(json_dump)

        with open("{}/model.json".format(self.model["repository"])) as f:
            self.calls = [json.loads(line, encoding="utf-8") for line in f]
예제 #2
0
class XGB(AbstractModels):
    """
    XGBoost

    # General parameters:
    Parameter	    Type	    Optional    Default	        Description
    objective	    string	    yes	        multi:softprob	objective function, among multi:softprob, binary:logistic, reg:linear, reg:logistic
    booster	        string	    yes	        gbtree	        which booster to use, gbtree or gblinear
    num_feature	    int	        yes	        set by xgbbost	maximum dimension of the feature
    eval_metric	    string	    yes	        obj dependant	evaluation metric internal to xgboost
    base_score	    double	    yes	        0.5	            initial prediction score, global bias
    seed	        int	        yes	        0	            random number seed
    iterations	    int	        no	        N/A	            number of boosting iterations
    test_interval	int	        yes	        1	            number of iterations between each testing pass
    save_period	int	            yes	        0	            number of iterations between model saving to disk

    # Booster parameters
    Parameter	        Type	    Optional    Default	    Description
    eta	                double	    yes	        0.3	        step size shrinkage
    gamma	            double	    yes	        0	        minimum loss reduction
    max_depth	        int	        yes	        6	        maximum depth of a tree
    min_child_weight	int	        yes	        1	        minimum sum of instance weight
    max_delta_step	    int	        yes	        0	        maximum delta step
    subsample	        double	    yes	        1.0	        subsample ratio of traning instance
    colsample	        double	    yes	        1.0	        subsample ratio of columns when contructing each tree
    lambda	            double	    yes	        1.0	        L2 regularization term on weights
    alpha	            double	    yes	        0.0	        L1 regularization term on weights
    lambda_bias	        double	    yes	        0.0	        L2 regularization for linear booster
    tree_method	        string	    yes	        auto	    tree construction algorithm, from auto, exact, approx

    # Example calls

    curl -X PUT "http://localhost:8080/services/affxgb" -d
    "{\"mllib\":\"xgboost\",\"description\":\"classification
    service\",\"type\":\"supervised\",\"parameters\":{\"input\":{\"connector\":\"svm\"},\"mllib\":{\"nclasses\":2}},\"model\":{\"repository\":\"/path/to/model\"}}"

    curl -X POST "http://localhost:8080/train" -d
    "{\"service\":\"testxgb\",\"async\":true,\"parameters\":{\"mllib\":{\"iterations\":100,\"test_interval\":10,\"objective\":\"binary:logistic\",\"booster_params\":{\"max_depth\":30}},\"input\":{},\"output\":{\"measure\":[\"auc\",\"mcll\",\"f1\"]}},\"data\":[\"/path/to/X_train.svm\",\"/path/to/X_test.svm\"]}"
    """

    def __init__(self, host="localhost",
                 port=8080,
                 sname="",
                 description="",
                 repository="",
                 connector="svm",
                 mllib="xgboost",
                 nclasses=2,
                 ntargets=None,
                 tmp_dir=None):
        self.host = host
        self.port = port
        self.sname = sname
        self.mllib = mllib
        self.description = description
        self.repository = repository
        self.connector = connector
        self.nclasses = nclasses
        self.ntargets = ntargets
        self.tmp_dir = tmp_dir

        self.model = {"repository": self.repository}
        self.service_parameters_mllib = {"nclasses": self.nclasses, "ntargets": self.ntargets}
        self.service_parameters_input = {"connector": self.connector}
        self.service_parameters_output = {}

        super(XGB, self).__init__(host=self.host, port=self.port, sname=self.sname,
                                  description=self.description, mllib=self.mllib,
                                  service_parameters_input=self.service_parameters_input,
                                  service_parameters_output=self.service_parameters_output,
                                  service_parameters_mllib=self.service_parameters_mllib,
                                  model=self.model, tmp_dir=self.tmp_dir)

    def fit(self, train_data, validation_data=[],
            objective="multi:softprob",
            booster="gbtree",
            eval_metric="auc",
            base_score=0.5,
            seed=0,
            nthread=-1,
            iterations=10,
            test_interval=1,
            save_period=0,
            metrics=["auc", "acc"],
            async=True,
            display_metric_interval=1,
            **booster_params):

        # df: True otherwise core dump when training on svm data
        self.train_parameters_input = {},

        self.train_parameters_output = {"measure": metrics},

        self.train_parameters_mllib = {
            "objective": objective,
            "booster": booster,
            "nthread": nthread,
            "eval_metric": eval_metric,
            "base_score": base_score,
            "seed": seed,
            "iterations": iterations,
            "test_interval": test_interval,
            "save_period": save_period,
            "booster_params": booster_params,
        }

        self.data = []

        if train_data.name == "svm":
            self.data.append(train_data.path)

            if validation_data:
                for connector in validation_data:
                    self.data.append(connector.path)

        elif train_data.name == "array":
            train_f = os.path.join(self.repository, "x_train_{}.svm".format(time_utils.fulltimestamp()))
            dump_svmlight_file(train_data.X, train_data.Y, train_f)
            self.data.append(train_f)

            if validation_data:
                for i, connector in enumerate(validation_data):
                    valid_f = os.path.join(self.repository, "x_val{}_{}.svm".format(i, time_utils.fulltimestamp()))
                    dump_svmlight_file(connector.X, connector.Y, valid_f)
                    self.data.append(valid_f)

        self.train_logs = self._train(self.data,
                                      self.train_parameters_input,
                                      self.train_parameters_mllib,
                                      self.train_parameters_output, display_metric_interval, async)
        return self.train_logs
예제 #3
0
파일: MLP.py 프로젝트: SuperChevre/pyDD
    def __init__(self,
                 host='localhost',
                 port=8080,
                 sname='',
                 mllib='caffe',
                 description='',
                 repository='',
                 templates='../templates/caffe',
                 connector='svm',
                 nclasses=None,
                 ntargets=None,
                 gpu=False,
                 gpuid=0,
                 template='mlp',
                 layers=[50],
                 activation='relu',
                 dropout=0.5,
                 regression=False,
                 finetuning=False,
                 db=True):
        self.host = host
        self.port = port
        self.sname = sname
        self.mllib = mllib
        self.description = description
        self.repository = repository
        self.templates = templates
        self.connector = connector
        self.nclasses = nclasses
        self.ntargets = ntargets
        self.gpu = gpu
        self.gpuid = gpuid
        self.template = template
        self.layers = layers
        self.activation = activation
        self.dropout = dropout
        self.regression = regression
        self.finetuning = finetuning
        self.db = db

        self.params = {
            'host': self.host,
            'port': self.port,
            'sname': self.sname,
            'mllib': self.mllib,
            'description': self.description,
            'repository': self.repository,
            'templates': self.templates,
            'connector': self.connector,
            'nclasses': self.nclasses,
            'ntargets': self.ntargets,
            'gpu': self.gpu,
            'gpuid': self.gpuid,
            'template': self.template,
            'layers': self.layers,
            'activation': self.activation,
            'dropout': self.dropout,
            'regression': self.regression,
            'finetuning': self.finetuning,
            'db': self.db,
        }

        self.n_pred = 0
        self.n_fit = 0
        self.calls = []
        self.answers = []
        self.model = {
            'templates': self.templates,
            'repository': self.repository
        }
        self.service_parameters_mllib = {
            'nclasses': self.nclasses,
            'ntargets': self.ntargets,
            'gpu': self.gpu,
            'gpuid': self.gpuid,
            'template': self.template,
            'layers': self.layers,
            'activation': self.activation,
            'dropout': self.dropout,
            'regression': self.regression,
            'finetuning': self.finetuning,
            'db': self.db
        }
        self.service_parameters_input = {'connector': self.connector}
        self.service_parameters_output = {}
        super(genericMLP, self).__init__(self.host, self.port)

        if self.sname == '':
            self.sname = "pyDD_MLP_{}".format(time_utils.fulltimestamp())
            self.description = self.sname
        else:
            self.delete_service(self.sname, "mem")

        tmp_dir = tempfile.mkdtemp()
        self.data_folder = "{}/data".format(tmp_dir)
        if self.model['repository'] == '':
            self.model['repository'] = "{}/model".format(tmp_dir)
        os_utils._create_dirs([self.model['repository'], self.data_folder])

        json_dump = self.create_service(self.sname, self.model,
                                        self.description, self.mllib,
                                        self.service_parameters_input,
                                        self.service_parameters_mllib,
                                        self.service_parameters_output)
        self.answers.append(json_dump)

        with open("{}/model.json".format(self.model['repository'])) as f:
            self.calls = [json.loads(line, encoding='utf-8') for line in f]
예제 #4
0
            if validation_data:

                if len(validation_data) == 1:
                    test_data = validation_data[0]
                    self.data.append(test_data.path)
                    if test_data.lmdb_path:
                        os.symlink(test_data.lmdb_path, os.path.join(self.repository, "test.lmdb"))
                else:
                    for i, connector in enumerate(validation_data):
                        self.data.append(connector.path)
                        if connector.lmdb_path:
                            os.symlink(connector.lmdb_path, os.path.join(self.repository, "test_{}.lmdb".format(i+1)))

        elif train_data.name == "array":
            train_f = os.path.join(self.repository, "x_train_{}.svm".format(time_utils.fulltimestamp()))
            dump_svmlight_file(train_data.X, train_data.Y, train_f)
            self.data.append(train_f)

            if validation_data:
                for i, conn in enumerate(validation_data):
                    valid_f = os.path.join(self.repository, "x_val{}_{}.svm".format(i, time_utils.fulltimestamp()))
                    dump_svmlight_file(conn.X, conn.Y, valid_f)
                    self.data.append(valid_f)

        self.train_logs = self._train(self.data,
                                      self.train_parameters_input,
                                      self.train_parameters_mllib,
                                      self.train_parameters_output, display_metric_interval, async)

        if train_data.lmdb_path:
예제 #5
0
파일: MLP.py 프로젝트: SuperChevre/pyDD
    def fit(self,
            X,
            Y=None,
            validation_data=[],
            iterations=100,
            test_interval=None,
            solver_type='SGD',
            base_lr=0.1,
            lr_policy=None,
            stepsize=None,
            momentum=None,
            weight_decay=None,
            power=None,
            gamma=None,
            iter_size=1,
            batch_size=128,
            metrics=['mcll', 'accp'],
            class_weights=None):

        self.filepaths = []
        if type(X) == np.ndarray or sparse.issparse(X):
            train_f = os.path.join(
                self.data_folder,
                "x_train_{}.svm".format(time_utils.fulltimestamp()))
            dump_svmlight_file(X, Y, train_f)
            self.filepaths.append(train_f)

            if len(validation_data) > 0:
                for i, (x_val, y_val) in enumerate(validation_data):
                    valid_f = os.path.join(
                        self.data_folder,
                        "x_val{}_{}.svm".format(i, time_utils.fulltimestamp()))
                    dump_svmlight_file(x_val, y_val, valid_f)
                    self.filepaths.append(valid_f)

        elif type(X) == list:
            self.filepaths = X
        elif type(X) == str:
            self.filepaths = [X]
        else:
            raise

        # df: True otherwise core dump when training on svm data
        self.train_parameters_input = {'db': True},
        self.train_parameters_output = {"measure": metrics},
        self.train_parameters_mllib = {
            'gpu':
            self.service_parameters_mllib['gpu'],
            'solver': {
                'iterations': iterations,
                'test_interval': test_interval,
                'base_lr': base_lr,
                'solver_type': solver_type,
                'lr_policy': lr_policy,
                'stepsize': stepsize,
                'momentum': momentum,
                'weight_decay': weight_decay,
                'power': power,
                'gamma': gamma,
                'iter_size': iter_size
            },
            'net': {
                'batch_size': batch_size
            },
            'class_weights':
            class_weights if class_weights else [1.] *
            self.service_parameters_mllib['nclasses']
        }

        if self.n_fit > 0:
            self.delete_service(self.sname, "mem")
            if 'template' in self.service_parameters_mllib:
                self.service_parameters_mllib.pop('template')

            self.create_service(self.sname, self.model, self.description,
                                self.mllib, self.service_parameters_input,
                                self.service_parameters_mllib,
                                self.service_parameters_output)

        json_dump = self.post_train(self.sname,
                                    self.filepaths,
                                    self.train_parameters_input,
                                    self.train_parameters_mllib,
                                    self.train_parameters_output,
                                    async=True)
        time.sleep(1)
        self.answers.append(json_dump)
        with open("{}/model.json".format(self.model['repository'])) as f:
            self.calls = [json.loads(line, encoding='utf-8') for line in f]

        self.n_fit += 1

        train_status = ''
        while True:
            train_status = self.get_train(self.sname, job=1, timeout=2)
            if train_status['head']['status'] == 'running':
                print(train_status['body']['measure'])
            else:
                print(train_status)
                break