Example #1
0
def add_rows_append_in(target_table_id, source_table_id, **kwargs):
    """
    把两张表纵向拼起来,取target_table的fields


    :param target_table:
    :type target_table:
    :param source_table:
    :type source_table:
    :param kwargs:
    :type kwargs:
    :return:
    :rtype:
    """
    # 无数据的单元格填充
    empty_cell_filling = None
    # source table
    source_table = staging_data_business.get_by_staging_data_set_id(
        staging_data_set_id=source_table_id)

    added_dict = []

    for row in source_table:
        new_row = {
            **row.to_mongo().to_dict(),
            # 'staging_data_set': ObjectId(target_table_id)
        }
        new_row.pop("_id")
        new_row.pop("staging_data_set")
        added_dict.append(new_row)
    staging_data_business.add_many(target_table_id, added_dict)
Example #2
0
def save_as_result(job_id, new_sds_name):
    job_obj = job_business.get_by_job_id(job_id)
    result = job_obj.result
    toolkit = job_obj.toolkit
    project_obj = job_obj.project

    sds_id = staging_data_set_business.add(
        name=new_sds_name,
        description='des',
        project=project_obj,
        # job=job_obj
    )

    # 拿到原表
    old_sds = StepBusiness.get_datasource(job_obj.steps)
    table = staging_data_business.get_by_staging_data_set_id(old_sds)

    table_dict = []
    for i in range(len(table)):
        row = table[i].to_mongo().to_dict()
        row.pop("_id")
        row.pop("staging_data_set")
        table_dict.append(row)

    # 复制原表
    staging_data_business.add_many(staging_data_set=sds_id,
                                   data_array=table_dict)

    # 保存结果
    save_result_sub(result, sds_id, toolkit)
Example #3
0
def get_row_col_info(sds_id):
    """
    get_row_col_info
    :param sds_id:
    :return:
    """
    sds = staging_data_business.get_by_staging_data_set_id(sds_id)
    row_n = len(sds)
    col_n = len(get_fields_with_types(sds_id))
    return {'row': row_n, 'col': col_n}
Example #4
0
def add_new_keys_value(sds_id, lst_dicts):
    """
        update data row by row
        :param update
        :return:
    """
    # get staging data的所有id
    ids = staging_data_business.get_by_staging_data_set_id(sds_id)
    for oid in ids:
        staging_data_business.update_by_id(oid.id, lst_dicts.pop(0))
Example #5
0
def add_new_key_value(sds_id, key, array):
    """
        update data row by row
        :param update:
        :return:
    """
    # get staging data的所有id
    ids = staging_data_business.get_by_staging_data_set_id(sds_id)
    for oid in ids:
        query = {key: array.copy().pop(0)}
        staging_data_business.update_by_id(oid.id, query)
Example #6
0
def add_columns_append_in(target_table_id, source_table_id, index,
                          added_fields, nan_type, **kwargs):
    # target table
    target_table = staging_data_business.get_by_staging_data_set_id(
        staging_data_set_id=target_table_id)
    # table to json
    target_table_json = table_to_json(target_table)

    # json to pd
    target_table_pd = pd.read_json(json.dumps(target_table_json))

    # 展开source table
    source_table = staging_data_business.get_by_staging_data_set_and_fields(
        source_table_id, fields=added_fields + index, with_id=True)
    # table to json
    source_table_json = table_to_json(source_table,
                                      added_fields=added_fields,
                                      with_new=True)

    # json to pd
    source_table_pd = pd.read_json(json.dumps(source_table_json))

    # pd index
    if len(index) != 0:
        target_table_pd = target_table_pd.set_index(index)
        source_table_pd = source_table_pd.set_index(index)

    result = pd.concat([target_table_pd, source_table_pd],
                       axis=1,
                       join_axes=[target_table_pd.index])

    # pandas dataframe to json
    update_json = result.to_json(orient='index')
    update_json = json.loads(update_json)
    update_dict = []
    if len(index) == 0:
        for key, value in update_json.items():
            update_dict.append({"_id": key, **value})
    elif len(index) == 1:
        for key, value in update_json.items():
            update_dict.append({"_id": key, **value})
    else:
        for key, value in update_json.items():
            ele = {}
            key = json.loads(key)
            for k, v in zip(index, key):
                ele[k] = v
            update_dict.append({**ele, **value})
    new_update_dict = [{
        **item, "_id": ObjectId(item["_id"]),
        "staging_data_set": ObjectId(item["staging_data_set"])
    } for item in update_dict]
    staging_data_business.update_many_with_new_fields(new_update_dict)
Example #7
0
def update_many_with_new_fields(raw_data, index, fields, name, sds_id):
    ids = staging_data_business.get_by_staging_data_set_id(sds_id)
    ids = list(ids)
    inn = 0
    while inn in index:
        inn += 1

    list_dicts = []
    # 判断是否为一维数组
    if not isinstance(raw_data[inn], list):
        str_name = fields[0] + name if len(fields) == 1 else name
        for i in range(len(ids)):
            list_dicts.append({'_id': ids[i].id, str_name: raw_data[i]})
    else:
        length1 = len(raw_data[inn])
        length2 = len(fields)
        # 判断是不是一对多还是多对多
        if length1 == length2:
            name_list = [item + '_' + name for item in fields]
        elif length2 == 1:
            name_list = [
                fields[0] + '_' + name + str(i) for i in range(length1)
            ]
        else:
            name_list = [name + str(i) for i in range(length1)]

        # for i in range(len(raw_data)):
        for i in range(len(raw_data)):
            arr = raw_data[i]
            if arr != arr:
                rows = [arr] * length1
                obj = dict(zip(name_list, rows))
            else:
                obj = dict(zip(name_list, arr))
            obj.update({'_id': ids[i].id})
            list_dicts.append(obj)

    print("list_dicts", list_dicts)
    # 把list_dicts存到数据库
    staging_data_business.update_many_with_new_fields(list_dicts)
Example #8
0
def get_all_jobs_of_project(project_id, categories, status=None):
    """
    get all jobs and job info of a project
    :param project_id:
    :param categories:
    :param status:
    :return:
    """
    from server3.business import job_business

    # jobs = project_business.get_by_id(project_id)['jobs']

    jobs = job_business.get_by_project(project_id).order_by('-create_time')

    history_jobs = {c: [] for c in categories}
    for job in jobs:
        # keys = history_jobs.keys()
        for key in categories:
            if status is None:
                check = job[key]
            else:
                check = job[key] and (job['status'] == status)
            if check:
                job_info = job.to_mongo()
                # model/toolkit info
                # job_info[key] = {
                #     'item_id': job[key]['id'],
                #     'name': job[key]['name'],
                #     'category': job[key]['category'],
                #     'parameter_spec': job[key]['parameter_spec'],
                #     'steps': job[key]['steps']
                # }
                job_info[key] = job[key].to_mongo()

                # source staging data set info
                job_info['staging_data_set'] = job['staging_data_set'][
                    'name'] if job['staging_data_set'] else None
                job_info['staging_data_set_id'] = job['staging_data_set'][
                    'id'] if job['staging_data_set'] else None

                # result sds info
                # object 用的是 .id  json用 _id
                if key == 'model':
                    try:
                        result_sds = staging_data_set_business.get_by_job_id(
                            job['id']).to_mongo()
                        if result_sds:
                            # model results
                            job_info['results'] = result_sds
                            metrics_status = [sd.to_mongo() for sd in
                                              staging_data_business.get_by_staging_data_set_id(
                                                  result_sds['_id']).order_by(
                                                  'n')]
                            # metrics_status.sort(key=lambda x: x['n'])
                            job_info['metrics_status'] = metrics_status
                            if len(metrics_status) > 0:
                                total_steps = get_total_steps(job)
                                job_info['percent'] = \
                                    metrics_status[-1]['n'] / total_steps * 100
                            if job_info['status'] == 200:
                                job_info['percent'] = 100
                            job_info['results_staging_data_set_id'] = \
                                result_sds[
                                    '_id'] if result_sds else None
                    except DoesNotExist:
                        result_sds = None
                if job['status'] == 200 and key == 'model':
                    temp_data_fields = job_info['params']['fit']['data_fields']
                    if not isinstance(temp_data_fields[0], list):
                        job_info['params']['fit']['data_fields'] = [
                            temp_data_fields]
                    print(job_info['params']['fit']['data_fields'][0])
                # model running status info
                # if key == 'model':
                #     job_name = KUBE_NAME['model'].format(job_id=job['id'])
                #     job_info = kube_service.get_job_status(job_info, job_name)

                # 获取 served_model 数据库中的信息
                served_model_id = job_info.get('served_model')
                if served_model_id:
                    served_model = served_model_business.get_by_id(
                        served_model_id)
                    # 获取 kube 中部署模型的状态
                    served_model = kube_service.get_deployment_status(
                        served_model)
                    served_model = served_model.to_mongo()

                    # 获取训练 served_model 时所用的数据的第一条
                    staging_data_demo = staging_data_service.get_first_one_by_staging_data_set_id(
                        job_info['staging_data_set_id'])
                    one_input_data_demo = []
                    for each_feture in \
                            job_info['params']['fit']['data_fields'][0]:
                        one_input_data_demo.append(
                            staging_data_demo[each_feture])
                    input_data_demo_string = '[' + ",".join(
                        str(x) for x in one_input_data_demo) + ']'
                    input_data_demo_string = '[' + input_data_demo_string + ',' + input_data_demo_string + ']'
                    print(input_data_demo_string)
                    # 生成use代码
                    job_info["served_model"] = served_model
                    job_info["served_model"][
                        "input_data_demo_string"] = input_data_demo_string
                    job_info = build_how_to_use_code(job_info)
                else:
                    served_model = None
                    job_info["served_model"] = served_model
                history_jobs[key].append(job_info)
                break
    return history_jobs