コード例 #1
0
def custom_model(conf, model_fn, input_data, **kw):
    """
    :param model_fn:
    :param params:
    :param input_data:
    :param kw:
    :return:
    """
    project_id = kw.pop('project_id', None)
    job_id = kw.pop('job_id', None)
    project = project_business.get_by_id(project_id)
    ow = ownership_business.get_ownership_by_owned_item(project, 'project')
    user_ID = ow.user.user_ID
    result_sds = kw.pop('result_sds', None)
    result_dir = kw.pop('result_dir', None)
    est_params = conf.get('estimator', None)
    fit_params = conf.get('fit', {})
    eval_params = conf.get('evaluate', {})

    if result_sds is None:
        raise RuntimeError('no result sds id passed to model')
    # if project_id is None:
    #     raise RuntimeError('no project_id input')

    # def eval_input_fn():
    #     return input_fn(test, continuous_cols, categorical_cols, label_col)

    logging_flag = kw.pop('logging', True)
    return custom_model_help(model_fn, input_data, project_id, job_id, user_ID,
                             result_dir, result_sds, est_params, fit_params,
                             eval_params, logging_flag)
コード例 #2
0
def main(unused_argv):
    job_id = FLAGS.job_id
    if job_id == "59ae047e0c11f35fafebc422":
        raise ValueError('no job_id flag')
    job = job_business.get_by_job_id(job_id)
    # project id
    project_id = job.project.id
    project = project_business.get_by_id(project_id)
    ow = ownership_business.get_ownership_by_owned_item(project, 'project')
    # user ID
    user_ID = ow.user.user_ID
    args = job.run_args

    try:
        run_model(args['conf'], args['project_id'], args['data_source_id'],
                  args['model_id'], job_id, **args['kwargs'])
    except Exception:
        # if error send error, save error and raise error
        exc_type, exc_value, exc_traceback = sys.exc_info()
        message = {
            'error': repr(traceback.format_exception(exc_type, exc_value,
                                                     exc_traceback)),
            'type': 'model'
        }
        print(message)
        emit_error(message, str(project_id), job_id=job_id, user_ID=user_ID)
        save_job_status(job, error=message, status=300)
    else:
        message = {
            'project_name': project.name,
            'type': 'model',
            'complete': True,
            'content': 'Model job completed in project ' + project.name
        }
        emit_success(message, str(project_id), job_id=job_id, user_ID=user_ID)
コード例 #3
0
def run_job():
    data = request.get_json()
    job_id = data['section_id']
    project_id = data["project_id"]

    job_obj = job_business.get_by_job_id(job_id)
    project = project_business.get_by_id(project_id)
    ow = ownership_business.get_ownership_by_owned_item(project, 'project')
    # user ID
    user_ID = ow.user.user_ID
    type = None
    try:
        if job_obj.toolkit:
            type = 'toolkit'
            complete = True
            content = 'Toolkit job completed in project ' + project.name
            result = job_service.run_toolkit_job(project_id=project_id,
                                                 job_obj=job_obj)
        elif job_obj.model:
            type = 'model'
            complete = False
            content = 'Model job successfully created in project ' + \
                      project.name
            result = job_service.run_model_job(project_id=project_id,
                                               job_obj=job_obj)
        else:
            return jsonify({"response":
                            'no model and toolkit in job object'}), 400
        result = json_utility.convert_to_json(result)
    except Exception as e:
        # if error send error, save error and raise error
        exc_type, exc_value, exc_traceback = sys.exc_info()
        message = {
            'error':
            repr(traceback.format_exception(exc_type, exc_value,
                                            exc_traceback)),
            'type':
            type
        }
        print(message)
        emit_error(message, str(project_id), job_id=job_id, user_ID=user_ID)
        save_job_status(job_obj, error=message, status=300)
        raise e
        # return jsonify({
        #     "response": {
        #         "result": message
        #     }}), 200
    else:
        message = {
            'project_name': project.name,
            'type': type,
            'complete': complete,
            'content': content
        }
        emit_success(message, str(project_id), job_id=job_id, user_ID=user_ID)
        return jsonify({"response": {"result": result}}), 200
コード例 #4
0
ファイル: job_service.py プロジェクト: zjn0224/mo
        def wrapper(*args, **kw):
            # create a job
            # model_obj = model_business.get_by_model_id(model_id)
            result_dir = kwargs.get('result_dir')

            project_obj = project_business.get_by_id(project_id)

            job_obj = job_business.get_by_job_id(job_id)

            # update a project
            project_business.insert_job_by_id(project_id, job_obj.id)
            project_business.update_items_to_list_field(project_id,
                                                        related_tasks=TYPE.get(
                                                            model_obj.category,
                                                            []))
            # create result sds for model
            sds_name = '%s_%s_result' % (model_obj['name'], job_obj['id'])
            try:
                sds = staging_data_set_business.get_by_job_id(job_obj.id)
            except DoesNotExist:
                print('free to create sds')
            else:
                staging_data_set_business.remove_by_id(sds.id)
            finally:
                result_sds_obj = staging_data_set_business.add(sds_name,
                                                               'des',
                                                               project_obj,
                                                               job=job_obj,
                                                               type='result')

            # run
            if result_dir:
                # result_dir += str(job_obj['id']) + '/'
                try:
                    os.makedirs(result_dir)
                except FileExistsError:
                    print('dir exists, no need to create')
                kw['result_dir'] = result_dir

            # generate_job_py(func, *args, **kw, result_sds=result_sds_obj,
            #                 project_id=project_id)

            func_result = func(*args,
                               **kw,
                               result_sds=result_sds_obj,
                               project_id=project_id,
                               job_id=job_id)
            # update a job
            job_business.end_job(job_obj)
            if isinstance(func_result, dict):
                func_result['job_id'] = str(job_obj['id'])

            return func_result
コード例 #5
0
def to_code():
    data = request.get_json()
    job_id = data['section_id']
    project_id = data["project_id"]

    job_obj = job_business.get_by_job_id(job_id)
    project = project_business.get_by_id(project_id)
    ow = ownership_business.get_ownership_by_owned_item(project, 'project')
    # user ID
    user_ID = ow.user.user_ID
    code = job_service.model_job_to_code(project_id=project_id,
                                         job_obj=job_obj)
    return jsonify({"response": {"code": code}}), 200
コード例 #6
0
def mlp(conf, input, **kw):
    result_sds = kw.pop('result_sds', None)
    project_id = kw.pop('project_id', None)
    result_dir = kw.pop('result_dir', None)
    job_id = kw.pop('job_id', None)
    project = project_business.get_by_id(project_id)
    ow = ownership_business.get_ownership_by_owned_item(project, 'project')
    user_ID = ow.user.user_ID
    f = conf['fit']
    e = conf['evaluate']
    x_train = input['x_tr']
    y_train = input['y_tr']
    x_val = input['x_te']
    y_val = input['y_te']
    x_test = input['x_te']
    y_test = input['y_te']

    with graph.as_default():
        return mlp_main(result_sds, project_id, job_id, user_ID, result_dir,
                        x_train, y_train, x_val, y_val, x_test, y_test, f, e)
コード例 #7
0
def remove_project_by_id(project_id, user_ID):
    """
    remove project by its object_id
    :param project_id: object_id of project to remove
    :return:
    """
    project = project_business.get_by_id(project_id)
    # check ownership
    ownership = ownership_business.get_ownership_by_owned_item(project,
                                                               'project')
    if user_ID != ownership.user.user_ID:
        raise ValueError('project not belong to this user, cannot delete')
    # delete tmp jupyterhub user
    delete_hub_user(user_ID, project.name)
    # delete project directory
    project_directory = UPLOAD_FOLDER + user_ID + '/' + project.name
    if os.path.isdir(project_directory):
        shutil.rmtree(project_directory)
    # delete project object
    return project_business.remove_by_id(project_id)
コード例 #8
0
def update_project(project_id, name, description, is_private=True,
                   related_fields=[], tags=[], related_tasks=[],
                   done_indices=[]):
    """
    Create a new project

    :param name: str
    :param description: str
    :param user_ID: ObjectId
    :param is_private: boolean
    :return: a new created project object
    """
    project = project_business.get_by_id(project_id)
    ow = ownership_business.get_ownership_by_owned_item(project, 'project')
    ownership_business.update_by_id(ow['id'], private=is_private)
    project_business.update_by_id(project_id, name=name,
                                  description=description,
                                  update_time=datetime.utcnow(),
                                  related_fields=related_fields,
                                  tags=tags, related_tasks=related_tasks,
                                  done_indices=done_indices)
コード例 #9
0
def get_by_id(project_id):
    project = project_business.get_by_id(project_id)
    ow = ownership_business.get_ownership_by_owned_item(project, 'project')
    project.is_private = ow.private
    return project
コード例 #10
0
ファイル: model_service.py プロジェクト: zjn0224/mo
def kube_run_model(conf, project_id, data_source_id, model_id, job_obj,
                   **kwargs):
    # file_id = kwargs.get('file_id')
    staging_data_set_obj = None
    if data_source_id:
        staging_data_set_obj = \
            staging_data_set_business.get_by_id(data_source_id)
    project_obj = project_business.get_by_id(project_id)
    # file_dict = {'file': ObjectId(file_id)} if file_id else {}
    model_obj = model_business.get_by_model_id(model_id)

    run_args = {
        "conf": conf,
        "project_id": project_id,
        "data_source_id": data_source_id,
        "model_id": model_id,
        "kwargs": kwargs
    }

    job_obj = job_business.update_job_by_id(job_obj.id, model=model_obj,
                                            staging_data_set=staging_data_set_obj,
                                            project=project_obj, params=conf,
                                            run_args=run_args, status=100)

    job_id = str(job_obj.id)
    print(job_id)
    return run_model(conf, project_id, data_source_id, model_id, job_id,
                     **kwargs)
    cwd = os.getcwd()
    job_name = job_id + '-training-job'
    client = kube_service.client
    try:
        # TODO need to terminate running pod
        kube_service.delete_job(job_name)
        while True:
            kube_service.get_job(job_name)
            time.sleep(1)
    except client.rest.ApiException:
        print('job not exists or deleted, ok to create')

    kube_json = {
        "apiVersion": "batch/v1",
        "kind": "Job",
        "metadata": {
            "name": job_name
        },
        "spec": {
            "template": {
                "metadata": {
                    "labels": {
                        "app": job_id
                    }
                },
                "spec": {
                    "containers": [
                        {
                            "name": job_id,
                            "image": "10.52.14.192/gzyw/model_app_pre",
                            "imagePullPolicy": "IfNotPresent",
                            "securityContext": {
                                "privileged": True,
                            },
                            "stdin": True,
                            "command": ["/usr/local/bin/python"],
                            "args": [
                                "run_model.py",
                                "--job_id", job_id
                            ],
                            "volumeMounts": [
                                {
                                    "mountPath": "/pyserver/user_directory",
                                    "name": "nfsvol"
                                },
                            ]
                        }
                    ],
                    "restartPolicy": "Never",
                    # "activeDeadlineSeconds": 1,
                    "volumes": [
                        {
                            "name": "nfsvol",
                            "persistentVolumeClaim": {
                                "claimName": "nfs-pvc"
                            }
                        },
                    ]
                },
            },
        }
    }
    # file_utils.write_to_filepath(json.dumps(kube_json), './model_app.json')
    # return
    api = kube_service.job_api
    resp = api.create_namespaced_job(body=kube_json, namespace=NAMESPACE)
    print("Job created. status='%s'" % str(resp.status))
    return {'job_id': job_id}
コード例 #11
0
def unpublish_project(project_id):
    project = project_business.get_by_id(project_id)
    ow = ownership_business.get_ownership_by_owned_item(project, 'project')
    return ownership_business.update_by_id(ow['id'], private=True)
コード例 #12
0
ファイル: job_service.py プロジェクト: zjn0224/mo
        def wrapper(*args, **kw):

            # create a job
            staging_data_set_obj = staging_data_set_business.get_by_id(
                staging_data_set_id)
            project_obj = project_business.get_by_id(project_id)
            job_spec = {
                "fields": {
                    "source": fields[0],
                    "target": fields[1]
                },
                "params": kw
            }
            job_obj = job_business.add_toolkit_job(toolkit_obj,
                                                   staging_data_set_obj,
                                                   project_obj, **job_spec)
            # update a project
            project_business.insert_job_by_id(project_id, job_obj.id)

            # calculate
            func_rst = func(*args, **kw)
            result = list(func_rst) if isinstance(func_rst,
                                                  tuple) else [func_rst]

            # 新设计的存取方式
            results = {"fields": {"source": fields[0], "target": fields[1]}}
            gen_info = []
            result_spec = toolkit_obj.result_spec

            for arg in result_spec["args"]:
                value = result.pop(0)
                results.update({arg["name"]: value})
                if arg["if_add_column"]:
                    # 不能使用中文名
                    str_name = "%s_col" % toolkit_obj.entry_function
                    value = data_utility.retrieve_nan_index(value, nan_index)
                    try:
                        staging_data_service.update_many_with_new_fields(
                            value, nan_index, fields[0], str_name,
                            staging_data_set_id)
                    except (TypeError, ValueError) as e:
                        print("ERRORS in data saved to database")

                if arg.get("attribute", False) and arg["attribute"] == "label":
                    labels = value
                elif arg.get("attribute",
                             False) and arg["attribute"] == "general_info":
                    gen_info.append({
                        arg["name"]: {
                            "value": value,
                            "description": arg["des"]
                        }
                    })

            # 可视化计算
            # 聚类分析
            if toolkit_obj.category == 0:
                json = {
                    "scatter":
                    data_utility.retrieve_nan_index(args[0], nan_index),
                    "labels":
                    labels,
                    "pie": [{
                        'name': el,
                        'value': labels.count(el)
                    } for el in set(labels)],
                    "centers":
                    results["Centroids of Clusters"],
                    "general_info":
                    gen_info,
                    "fields":
                    fields[0],
                    "category":
                    toolkit_obj.category
                }

            # 特征选取
            elif toolkit_obj.category == 1:
                from scipy.stats import pearsonr
                # from minepy import MINE
                data = list(zip(*args[0]))
                target_flag = 1 if len(args) == 2 else 0
                target = args[1] if target_flag else None

                json = {
                    "Y_target": fields[1],
                    "X_fields": fields[0],
                    "labels": labels,
                    "bar": results["scores"],
                    "general_info": {
                        "Selected Features":
                        "%s out of %s" %
                        (len(list(filter(lambda x: x is True,
                                         labels))), len(fields[0])),
                        "Selected Fields":
                        " ".join(
                            str(el)
                            for el in list(compress(fields[0], labels))),
                        "Number of NaN":
                        len(nan_index)
                    },
                    "scatter": {
                        "y_domain":
                        target,
                        "x_domain":
                        data,
                        "pearsonr": [
                            pearsonr(el, target)[0] if target_flag else None
                            for el in data
                        ],
                        # "mic": [MINE(alpha=0.6, c=15, est="mic_approx").compute_score(el,
                        # list(data[0]).mic()) for el in list(data[1:])]}
                        "mic": [None for el in data]
                    },
                    "category": toolkit_obj.category
                }

            # 数值转换
            elif toolkit_obj.category == 2:
                inn = 0
                while inn in nan_index:
                    inn = inn + 1
                # 由于出来的数据格式不一致,判断是否为二维数据(是=>1, 不是=>0)
                flag_shape = 1 if isinstance(labels[inn], list) else 0

                result_be = labels if flag_shape else np.array(labels).reshape(
                    [-1, 1]).tolist()

                data = list(zip(*args[0]))
                result = list(zip(*result_be))

                # 曾经两表合并,现在不需要了
                # merge_data = list(zip(*(data + result)))
                if len(result) == len(fields[0]):
                    lab_fields = [
                        str(fields[0][i]) + "_New_Col"
                        for i in range(len(result))
                    ]
                else:
                    lab_fields = [
                        str(fields[0][0]) + "_New_Col_" + str(i)
                        for i in range(len(result))
                    ]

                # merge_fields = fields[0] + lab_fields

                flag_str1 = isinstance(args[0][inn][0], str)
                flag_str2 = isinstance(result_be[inn][0], str)
                bar1 = []
                bar2 = []
                for el in fields[0]:
                    indx = fields[0].index(el)
                    raw_d = data[indx]

                    if not flag_str1 and len(set(raw_d)) > 5:
                        bar1_tmp = visualization_service.freq_hist(raw_d)
                    else:
                        seta = set(raw_d)
                        x_domain = [el for el in seta]
                        y_domain = [raw_d.count(el) for el in seta]
                        bar1_tmp = {'x_domain': x_domain, 'y_domain': y_domain}
                    bar1_tmp.update({"field": el, "title": "数据分布直方图(栏位转换前)"})
                    bar1.append(bar1_tmp)

                for el in lab_fields:
                    indx = lab_fields.index(el)
                    raw_re = result[indx]

                    if not flag_str2 and len(set(raw_re)) > 5:
                        bar2_tmp = visualization_service.freq_hist(raw_re)
                    else:
                        seta = set(raw_re)
                        x_domain = [el for el in seta]
                        y_domain = [raw_re.count(el) for el in seta]
                        bar2_tmp = {'x_domain': x_domain, 'y_domain': y_domain}
                    bar2_tmp.update({"field": el, "title": "数据分布直方图(栏位转换后)"})
                    bar2.append(bar2_tmp)

                json = {
                    "category": toolkit_obj.category,
                    "table1": {
                        "title": "原始数据",
                        "field": fields[0],
                        "data": [dict(zip(fields[0], arr)) for arr in args[0]]
                    },
                    "table2": {
                        "title": "转换后数据",
                        "field": lab_fields,
                        "data":
                        [dict(zip(lab_fields, arr)) for arr in result_be]
                    },
                    "bar1": bar1,
                    "bar2": bar2
                }

            # 降维
            elif toolkit_obj.category == 3:
                flag = toolkit_obj.parameter_spec["data"]["type"][
                    "key"] == "transfer_box"
                data = list(zip(*args[0]))

                if flag:
                    data.append(args[1])
                lab = list(zip(*labels))
                lab_fields = ["New Col" + str(i) for i in range(len(lab))]
                var1 = [np.var(da) for da in data]
                var2 = [np.var(da) for da in lab]
                merge_fields = fields[0] + fields[1] if fields[1] else \
                    fields[0]
                x_domain = merge_fields + ["_empty"] + lab_fields
                y_domain = var1 + [0] + var2

                temp = var1[:-1] if flag else var1
                json = {
                    "table1": {
                        "X_fields":
                        fields[0],
                        "Y_fields":
                        fields[1],
                        "data": [
                            dict(zip(merge_fields, arr))
                            for arr in list(zip(*data))
                        ]
                    },
                    "table2": {
                        "data": [dict(zip(lab_fields, arr)) for arr in labels],
                        "fields": lab_fields
                    },
                    "bar": {
                        "x_domain": x_domain,
                        "y_domain": y_domain
                    },
                    "pie1": [{
                        "name": fields[0][i],
                        "value": temp[i]
                    } for i in range(len(temp))],
                    "pie2": [{
                        "name": lab_fields[i],
                        "value": var2[i]
                    } for i in range(len(var2))],
                    "general_info":
                    gen_info,
                    "category":
                    toolkit_obj.category
                }

            else:
                json = {}

            # update a job
            job_business.end_job(job_obj)

            if result_spec["if_reserved"]:
                # create result sds for toolkit
                sds_name = '%s_%s_result' % (toolkit_obj['name'],
                                             job_obj['id'])
                result_sds_obj = staging_data_set_business.add(sds_name,
                                                               'des',
                                                               project_obj,
                                                               job=job_obj,
                                                               type='result')
                logger_service.save_result(
                    result_sds_obj,
                    **{"result": json_utility.convert_to_json(results)})
                logger_service.save_result(result_sds_obj,
                                           **{"visualization": json})
                return {
                    "visual_sds_id": str(result_sds_obj.id) if json else None,
                    "result": results
                }

            return {"result": results}
コード例 #13
0
ファイル: job_service.py プロジェクト: zjn0224/mo
def list_by_project_id(project_id):
    project = project_business.get_by_id(project_id)
    return job_business.get_by_project(project)
コード例 #14
0
ファイル: keras_seq.py プロジェクト: zjn0224/mo
def keras_seq(conf, input, **kw):
    """
    a general implementation of sequential model of keras
    :param conf: config dict
    :return:
    """
    result_sds = kw.pop('result_sds', None)
    project_id = kw.pop('project_id', None)
    job_id = kw.pop('job_id', None)
    project = project_business.get_by_id(project_id)
    ow = ownership_business.get_ownership_by_owned_item(project, 'project')
    user_ID = ow.user.user_ID
    print('conf')
    print(conf)
    result_dir = kw.pop('result_dir', None)
    if result_sds is None:
        raise RuntimeError('no result sds id passed to model')
    if project_id is None:
        raise RuntimeError('no project id passed to model')

    with graph.as_default():
        model = Sequential()

        ls = conf['layers']
        comp = conf['compile']
        f = conf['fit']
        e = conf['evaluate']
        x_train = input['x_tr']
        y_train = input['y_tr']
        x_val = input['x_te']
        y_val = input['y_te']
        x_test = input['x_te']
        y_test = input['y_te']

        training_logger = logger_service.TrainingLogger(f['args']['epochs'],
                                                        project_id,
                                                        job_id,
                                                        user_ID,
                                                        result_sds)

        # TODO add validator
        # op = comp['optimizer']

        # loop to add layers
        for l in ls:
            # get layer class from keras
            layer_class = getattr(layers, l['name'])
            # add layer
            model.add(layer_class(**l['args']))

        # optimiser
        # sgd_class = getattr(optimizers, op['name'])
        # sgd = sgd_class(**op['args'])

        # define the metrics
        # compile
        model.compile(**comp['args'])

        # callback to save metrics
        batch_print_callback = LambdaCallback(on_epoch_begin=
                                              lambda epoch, logs:
                                              training_logger.log_epoch_begin(
                                                  epoch, logs),
                                              on_epoch_end=
                                              lambda epoch, logs:
                                              training_logger.log_epoch_end(
                                                  epoch, logs),
                                              on_batch_end=
                                              lambda batch, logs:
                                              training_logger.log_batch_end(
                                                  batch, logs)
                                              )

        # checkpoint to save best weight
        best_checkpoint = MyModelCheckpoint(
            os.path.abspath(os.path.join(result_dir, 'best.hdf5')),
            save_weights_only=True,
            verbose=1, save_best_only=True)
        # checkpoint to save latest weight
        general_checkpoint = MyModelCheckpoint(
            os.path.abspath(os.path.join(result_dir, 'latest.hdf5')),
            save_weights_only=True,
            verbose=1)

        # training
        history = model.fit(x_train, y_train,
                            validation_data=(x_val, y_val),
                            callbacks=[batch_print_callback, best_checkpoint,
                                       general_checkpoint],
                            verbose=0,
                            **f['args'])

        # testing
        score = model.evaluate(x_test, y_test, **e['args'])
        # weights = model.get_weights()
        config = model.get_config()
        logger_service.log_train_end(result_sds,
                                     model_config=config,
                                     score=score,
                                     history=history.history)
        keras_saved_model.save_model(result_dir, model)
        return {'score': score, 'history': history.history}
コード例 #15
0
ファイル: model_service.py プロジェクト: zjn0224/mo
def model_to_code(conf, project_id, data_source_id, model_id, job_obj,
                  **kwargs):
    """
    run model by model_id and the parameter config

    :param conf:
    :param project_id:
    :param data_source_id:
    :param model_id:
    :param kwargs:
    :return:
    """
    file_id = kwargs.get('file_id')
    staging_data_set_obj = None
    if data_source_id:
        staging_data_set_obj = \
            staging_data_set_business.get_by_id(data_source_id)
    project_obj = project_business.get_by_id(project_id)
    file_dict = {'file': ObjectId(file_id)} if file_id else {}
    model_obj = model_business.get_by_model_id(model_id)

    run_args = {
        "conf": conf,
        "project_id": project_id,
        "data_source_id": data_source_id,
        "model_id": model_id,
        "kwargs": kwargs
    }

    # # create model job
    # job_obj = job_business.add_model_job(model_obj, staging_data_set_obj,
    #                                      project_obj, params=conf,
    #                                      run_args=run_args,
    #                                      **file_dict)

    job_obj = job_business.update_job_by_id(job_obj.id, model=model_obj,
                                            staging_data_set=staging_data_set_obj,
                                            project=project_obj, params=conf,
                                            run_args=run_args, status=100)
    job_id = str(job_obj.id)

    # model_obj = model_business.get_by_model_id(model_id)
    f = getattr(models, model_obj.to_code_function)

    if model_obj['category'] == 0:
        # keras nn
        head_str = manage_supervised_input_to_str(conf, data_source_id,
                                                  **kwargs)
        return job_service.run_code(conf, project_id, data_source_id,
                                    model_obj, f, job_id, head_str)
    elif model_obj['category'] == ModelType['unstructured']:
        # input from folder
        head_str = manage_unstructured_to_str(conf, data_source_id,
                                              **kwargs)
        return job_service.run_code(conf, project_id, None,
                                    model_obj, f, job_id, head_str,
                                    file_id=data_source_id)

    elif model_obj['category'] == ModelType['advanced']:
        # no input
        return job_service.run_code(conf, project_id, None,
                                    model_obj, f, job_id, '',
                                    file_id=None)
    else:
        # custom models
        head_str = ''
        head_str += 'import logging\n'
        head_str += 'import numpy as np\n'
        head_str += 'import pandas as pd\n'
        head_str += 'import tensorflow as tf\n'
        head_str += 'from tensorflow.python.framework import constant_op\n'
        head_str += 'from tensorflow.python.framework import dtypes\n'
        head_str += 'from tensorflow.contrib.learn.python.learn import metric_spec\n'
        head_str += 'from server3.lib import models\n'
        head_str += 'from server3.lib.models.modified_tf_file.monitors import ValidationMonitor\n'
        head_str += 'from server3.business import staging_data_set_business\n'
        head_str += 'from server3.business import staging_data_business\n'
        head_str += 'from server3.service import staging_data_service\n'
        head_str += "from server3.service import job_service\n"
        head_str += 'from server3.service.model_service import ' \
                    'split_categorical_and_continuous\n'
        head_str += 'from server3.service.custom_log_handler ' \
                    'import MetricsHandler\n'
        head_str += 'model_fn = models.%s\n' % model_obj.entry_function
        head_str += "data_source_id = '%s'\n" % data_source_id
        head_str += "model_name = '%s'\n" % model_obj.name
        head_str += "kwargs = %s\n" % kwargs
        fit = conf.get('fit', None)
        if model_obj['category'] == 1:
            data_fields = fit.get('data_fields', [[], []])
            head_str += 'data_fields = %s\n' % data_fields
            head_str += inspect.getsource(
                model_input_manager_custom_supervised)
            head_str += "input_dict = model_input_manager_custom_supervised(" \
                        "data_fields, data_source_id, model_name, **kwargs)\n"
        elif model_obj['category'] == 2:
            x_cols = fit.get('data_fields', [])
            head_str += "x_cols = %s\n" % x_cols
            head_str += inspect.getsource(model_input_manager_unsupervised)
            head_str += "input_dict = model_input_manager_unsupervised(x_cols, " \
                        "data_source_id, model_name)\n"
        return job_service.run_code(conf, project_id, data_source_id,
                                    model_obj, f, job_id, head_str)
コード例 #16
0
ファイル: model_service.py プロジェクト: zjn0224/mo
def run_model(conf, project_id, data_source_id, model_id, job_id, **kwargs):
    """
    run model by model_id and the parameter config

    :param conf:
    :param project_id:
    :param data_source_id:
    :param model_id:
    :param job_id:
    :param kwargs:
    :return:
    """
    model = model_business.get_by_model_id(model_id)
    project = project_business.get_by_id(project_id)
    ownership = ownership_business.get_ownership_by_owned_item(project,
                                                               'project')
    result_dir = os.path.join(user_directory, ownership.user.user_ID,
                              project.name, job_id)

    # import model function
    if model['category'] == ModelType['neural_network']:
        # keras nn
        f = getattr(models, model.entry_function)

        input_dict = manage_nn_input(conf, data_source_id, **kwargs)
        return job_service.run_code(conf, project_id, data_source_id,
                                    model, f, job_id, input_dict,
                                    result_dir=result_dir)
    elif model['category'] == ModelType['unstructured']:
        # input from folder
        f = getattr(models, model.entry_function)
        input_dict = model_input_manager_unstructured(conf, data_source_id,
                                                      **kwargs)
        return job_service.run_code(conf, project_id, None,
                                    model, f, job_id, input_dict,
                                    file_id=data_source_id,
                                    result_dir=result_dir)
    elif model['category'] == ModelType['hyperopt']:
        f = getattr(models, model.entry_function)
        fit = conf.get('fit', None)
        data_fields = fit.get('data_fields', [[], []])
        input_dict = model_input_manager_custom_supervised(data_fields,
                                                           data_source_id,
                                                           model.name,
                                                           **kwargs)
        return job_service.run_code(conf, project_id, data_source_id,
                                    model, f, job_id, input_dict,
                                    result_dir=result_dir)
    else:
        # custom models
        f = models.custom_model
        model_fn = getattr(models, model.entry_function)
        fit = conf.get('fit', None)
        if model['category'] == ModelType['custom_supervised']:
            data_fields = fit.get('data_fields', [[], []])
            input_dict = model_input_manager_custom_supervised(data_fields,
                                                               data_source_id,
                                                               model.name,
                                                               **kwargs)
            return job_service.run_code(conf, project_id, data_source_id,
                                        model, f, job_id, model_fn, input_dict,
                                        result_dir=result_dir)
        if model['category'] == ModelType['unsupervised']:
            x_cols = fit.get('data_fields', [])
            input_dict = model_input_manager_unsupervised(x_cols,
                                                          data_source_id,
                                                          model.name,
                                                          **kwargs)
            return job_service.run_code(conf, project_id, data_source_id,
                                        model, f, job_id, model_fn, input_dict,
                                        result_dir=result_dir)


        if model['category'] == ModelType['hyperopt']:
            data_fields = fit.get('data_fields', [[], []])
            input_dict = model_input_manager_custom_supervised(data_fields,
                                                               data_source_id,
                                                               model.name,
                                                               **kwargs)
            return job_service.run_code(conf, project_id, data_source_id,
                                        model, f, job_id, model_fn, input_dict,
                                        result_dir=result_dir)
コード例 #17
0
def start_project_playground(project_id):
    # generate the project volume path
    project = project_business.get_by_id(project_id)
    user_ID = ownership_business.get_owner(project, 'project').user_ID
    volume_dir = os.path.join(USER_DIR, user_ID, project.name, 'volume/')
    if not os.path.exists(volume_dir):
        os.makedirs(volume_dir)
    abs_volume_dir = os.path.abspath(volume_dir)

    deploy_name = project_id + '-jupyter'
    port = port_for.select_random(ports=set(range(30000, 32767)))
    # port = network_utility.get_free_port_with_range(30000, 32767)
    kube_json = {
        "apiVersion": "apps/v1beta1",
        "kind": "Deployment",
        "metadata": {
            "name": deploy_name
        },
        "spec": {
            "template": {
                "metadata": {
                    "labels": {
                        "app": project_id
                    }
                },
                "spec": {
                    # "securityContext": {
                    #     "runAsUser": 1001,
                    # },
                    "containers": [
                        {
                            "name": project_id,
                            "image": "10.52.14.192/gzyw/jupyter_app",
                            "imagePullPolicy": "IfNotPresent",
                            "ports": [{
                                "containerPort": 8888,
                                # "hostPort": port
                            }],
                            "stdin": True,
                            "command": ['python'],
                            "args": ["-m", "notebook", "--no-browser",
                                     "--allow-root",
                                     "--ip=0.0.0.0",
                                     "--NotebookApp.allow_origin=*",
                                     "--NotebookApp.disable_check_xsrf=True",
                                     "--NotebookApp.token=''",
                                     "--NotebookApp.iopub_data_rate_limit=10000000000"],
                            "volumeMounts": [{
                                "mountPath": "/home/root/work/volume",
                                "name": project_id + "-volume"
                            }]
                        }
                    ],
                    "volumes": [{
                        "name": project_id + "-volume",
                        "hostPath": {"path": abs_volume_dir},
                    }]
                },
            },
        }
    }
    service_json = {
        "kind": "Service",
        "apiVersion": "v1",
        "metadata": {
            "name": "my-" + project_id + "-service"
        },
        "spec": {
            "type": "NodePort",
            "ports": [
                {
                    "port": 8888,
                    "nodePort": port
                }
            ],
            "selector": {
                "app": project_id
            }
        }
    }
    # import json
    # from server3.utility import file_utils
    # file_utils.write_to_filepath(json.dumps(kube_json), './jupyter_app.json')
    # return
    api = kube_service.deployment_api
    s_api = kube_service.service_api
    api.create_namespaced_deployment(body=kube_json,
                                     namespace=NAMESPACE)
    replicas = api.read_namespaced_deployment_status(
        deploy_name, NAMESPACE).status.available_replicas
    # wait until deployment is available
    while replicas is None or replicas < 1:
        replicas = api.read_namespaced_deployment_status(
            deploy_name, NAMESPACE).status.available_replicas
    # FIXME one second sleep to wait for container ready
    import timemr
    time.sleep(1)
    s_api.create_namespaced_service(body=service_json, namespace=NAMESPACE)
    time.sleep(1)
    return port
コード例 #18
0
ファイル: served_model_service.py プロジェクト: zjn0224/mo
def first_deploy(user_ID, job_id, name, description, input_info, output_info,
                 examples, server, input_type, model_name, projectId,
                 is_private,
                 **optional):
    """
    :param user_ID:
    :param job_id:
    :param name:
    :param description:
    :param input_info:
    :param output_info:
    :param examples:
    :param server:
    :param input_type:
    :param model_name:
    :param is_private:
    :param optional:
    :return:
    """
    job = job_business.get_by_job_id(job_id)
    job_info = job.to_mongo()
    project = project_business.get_by_id(projectId)
    related_fields = project.related_fields
    related_tasks = project.related_tasks
    tags = project.tags

    # if not deployed do the deployment
    try:
        served_model_business.get_by_job(job)
    except DoesNotExist:
        model_type = job.model.category
        if model_type == ModelType['neural_network'] \
                or model_type == ModelType['unstructured']:
            export_path, version = model_service.export(job_id, user_ID)
        else:
            result_sds = staging_data_set_business.get_by_job_id(job_id)
            saved_model_path_array = result_sds.saved_model_path.split('/')
            version = saved_model_path_array.pop()
            export_path = '/'.join(saved_model_path_array)

        cwd = os.getcwd()
        deploy_name = job_id + '-serving'
        service_name = "my-" + job_id + "-service"
        port = port_for.select_random(ports=set(range(30000, 32767)))
        export_path = "/home/root/work/user_directory" + \
                      export_path.split("/user_directory", 1)[1]
        # export_path = "/home/root/work/user_directory" + export_path.split("/user_directory", 1)[1]
        kube_json = {
            "apiVersion": "apps/v1beta1",
            "kind": "Deployment",
            "metadata": {
                "name": deploy_name
            },
            "spec": {
                "template": {
                    "metadata": {
                        "labels": {
                            "app": job_id
                        }
                    },
                    "spec": {
                        "containers": [
                            {
                                "name": job_id,
                                "image": "10.52.14.192/gzyw/serving_app",
                                "imagePullPolicy": "IfNotPresent",
                                "ports": [{
                                    "containerPort": 9000,
                                }],
                                "stdin": True,
                                "command": ['tensorflow_model_server'],
                                "args": ['--enable_batching',
                                         '--port={port}'.format(
                                             port=SERVING_PORT),
                                         '--model_name={name}'.format(
                                             name=model_name),
                                         '--model_base_path={export_path}'.format(
                                             export_path=export_path)],
                                "volumeMounts": [
                                    {
                                        "mountPath": "/home/root/work/user_directory",
                                        "name": "nfsvol"
                                    },
                                ]
                            }
                        ],
                        "volumes": [
                            {
                                "name": "nfsvol",
                                "persistentVolumeClaim": {
                                    "claimName": "nfs-pvc"
                                }
                            },
                        ]
                    },
                },
            }
        }
        service_json = {
            "kind": "Service",
            "apiVersion": "v1",
            "metadata": {
                "name": service_name
            },
            "spec": {
                "type": "NodePort",
                "ports": [
                    {
                        "port": 9000,
                        "nodePort": port
                    }
                ],
                "selector": {
                    "app": job_id
                }
            }
        }
        # import json
        # from server3.utility import file_utils
        # file_utils.write_to_filepath(json.dumps(kube_json), './jupyter_app.json')
        # return
        api = kube_service.deployment_api
        s_api = kube_service.service_api
        resp = api.create_namespaced_deployment(body=kube_json,
                                                namespace=NAMESPACE)
        s_api.create_namespaced_service(body=service_json, namespace=NAMESPACE)
        # tf_model_server = './tensorflow_serving/model_servers/tensorflow_model_server'
        # p = subprocess.Popen([
        #     tf_model_server,
        #     '--enable_batching',
        #     '--port={port}'.format(port=SERVING_PORT),
        #     '--model_name={name}'.format(name=name),
        #     '--model_base_path={export_path}'.format(export_path=export_path)
        # ], start_new_session=True)
        # add a served model entity
        server = server.replace('9000', str(port))

        data_fields = job_info['params']['fit']['data_fields']

        job_info['staging_data_set'] = job['staging_data_set'][
            'name'] if job['staging_data_set'] else None
        job_info['staging_data_set_id'] = job['staging_data_set'][
            'id'] if job['staging_data_set'] else None

        staging_data_demo = staging_data_service.get_first_one_by_staging_data_set_id(
            job_info['staging_data_set_id'])
        one_input_data_demo = []
        for each_feture in job_info['params']['fit']['data_fields'][0]:
            one_input_data_demo.append(staging_data_demo[each_feture])
        input_data_demo_string = '[' + ",".join(
            str(x) for x in one_input_data_demo) + ']'
        input_data_demo_string = '[' + input_data_demo_string + ',' + input_data_demo_string + ']'

        return first_save_to_db(user_ID, name, description, input_info,
                                output_info,
                                examples, version,
                                deploy_name, server,
                                input_type, export_path, job,
                                job_id, model_name,
                                related_fields,
                                related_tasks, tags, is_private, data_fields,
                                input_data_demo_string,
                                service_name,projectId,
                                **optional)
コード例 #19
0
def fork(project_id, new_user_ID):
    """
    fork project
    :param project_id:
    :param new_user_ID:
    :return:
    """
    # get project
    project = project_business.get_by_id(project_id)

    # get ownership, and check privacy
    ownership = ownership_business.get_ownership_by_owned_item(
        project, 'project')
    if ownership.private is True:
        raise NameError('forked project is private, fork failed')
    if ownership.user.user_ID == new_user_ID:
        raise NameError('you are forking your self project')
    # get user object
    user = UserBusiness.get_by_user_ID(new_user_ID)
    # copy and save project
    project_cp = project_business.copy(project)
    # create ownership relation
    ownership_business.add(user, True, project=project_cp)

    # copy staging data sets
    sds_array = staging_data_set_business.get_by_project_id(project_id, False)
    for sds in sds_array:
        staging_data_service.copy_staging_data_set(sds, project_cp)

    # copy jobs and save
    jobs = project.jobs
    jobs_cp = []
    for job in jobs:
        # get source sds
        if hasattr(job, 'staging_data_set') and job.staging_data_set:
            sds_cp = staging_data_set_business.get_by_name_and_project(
                job.staging_data_set.name, job.staging_data_set.project)
            # sds_cp = staging_data_service.copy_staging_data_set(
            #     job.staging_data_set, project_cp)
        else:
            sds_cp = None
        # copy job
        job_cp = job_business.copy_job(job, project_cp, sds_cp)
        if not job_cp:
            continue
        jobs_cp.append(job_cp)
        # copy result staging data set by job and bind to project
        try:
            # get result sds
            result_sds = staging_data_set_business.get_by_job_id(job['id'])
            # bind job to sds
            staging_data_set_business.update_job_by_name_and_project(
                result_sds.name, result_sds.project, job_cp)
            # staging_data_service.copy_staging_data_set(result_sds, project_cp,
            #                                            belonged_job=job_cp)
        except DoesNotExist:
            pass

    project_business.update_by_id(project_cp['id'], jobs=jobs_cp)
    project_cp.reload()
    return project_cp
コード例 #20
0
def add_staging_data_set_by_data_set_id(sds_name, sds_description, project_id,
                                        data_set_id):
    """
    Create staging_data_set and copy to staging_data by original data_set id

    :param sds_name: str
    :param sds_description: str
    :param project_id: ObjectId
    :param data_set_id: ObjectId
    :return: new staging_data_set object
    """
    # get project object
    # project = project_business.get_by_id(project_id)

    # create new staging data set
    ds_obj = data_set_business.get_by_id(data_set_id)
    ds = ds_obj.to_mongo()
    ds.pop('name')
    ds.pop('description')
    sds = staging_data_set_business.add(sds_name, sds_description, project_id,
                                        **ds)

    # update project info
    # note: related_field in data set become related_fields here
    project_business.update_items_to_list_field(
        project_id,
        tags=ds.get('tags', []),
        related_tasks=ds.get('related_tasks', []),
        related_fields=ds.get('related_field', []))

    # generate the project volume path
    project = project_business.get_by_id(project_id)
    user_ID = ownership_business.get_owner(project, 'project').user_ID
    volume_dir = os.path.join(USER_DIR, user_ID, project.name, 'volume/')
    if not os.path.exists(volume_dir):
        os.makedirs(volume_dir)

    # copy data from data(raw) to staging data
    # get all data objects by data_set id
    try:
        # copy the file instance to project volume
        if hasattr(ds_obj, 'file') and ds_obj.file:
            file = ds_obj.file
            if os.path.isdir(file.uri):
                dst = os.path.join(volume_dir, os.path.dirname(file.uri))
                # if dir exists, remove it and copytree, cause copytree will
                #  create the dir
                if os.path.exists(dst):
                    shutil.rmtree(dst)
                shutil.copytree(file.uri, dst)
            else:
                shutil.copy(file.uri, volume_dir)

        data_objects = data_business.get_by_data_set(data_set_id)
        # convert mongoengine objects to dicts
        data_objects = json_utility.me_obj_list_to_dict_list(data_objects)

        # remove data set id when import to sds
        for d in data_objects:
            d.pop('data_set')

        if data_objects:
            staging_data_business.add_many(sds, data_objects)
        return sds
    except Exception as e:
        # remove staging_data_set and staging_data
        staging_data_business.remove_by_staging_data_set_id(sds.id)
        staging_data_set_business.remove_by_id(sds.id)
        raise e