Esempio n. 1
0
def saveInfo(request, project_id, atom_learn_id, input_comp_id, algorithm,
             params: List[Param]):
    # __ALGORITHM_PARAMS = ALGORITHM_PARAMS
    if algorithm not in ALGORITHM_PARAMS:
        return Response.fail(ERRORS.ALGORITHM_NOT_SUPPORTED, None)
    algorithm_params = ALGORITHM_PARAMS[algorithm]
    db_params = list()
    checking_results = list()
    for param in params:
        values = param.values
        param_name = param.name
        # 参数检查
        param_limit = algorithm_params[param_name]
        checking_result = param_checking(param_name, values, param_limit)
        if checking_result is not None:
            checking_results.append(checking_result)
        else:
            db_params.append(
                AtomLearnParam(project_id=project_id,
                               component_id=atom_learn_id,
                               param_name=param_name,
                               param_value=values))
    # 参数有错
    if len(checking_results) > 0:
        return Response.fail(ERRORS.ALGORITHM_PARAM_ERROR, checking_results)
    AtomLearn.objects.update_or_create(project_id=project_id,
                                       component_id=atom_learn_id,
                                       defaults=dict(
                                           input_comp_id=input_comp_id,
                                           algorithm=algorithm))
    AtomLearnParam.objects.filter(project_id=project_id,
                                  component_id=atom_learn_id).delete()
    AtomLearnParam.objects.bulk_create(db_params)
    result = {"data": "保存成功", "status": True, "error": ""}
    return Response.success(result)
Esempio n. 2
0
def save_with_default(request, project_id, atom_learn_id, input_comp_id, id,
                      target, algorithm):
    """
    保存,算法的高级参数使用默认
    :param request:
    :param project_id:
    :param atom_learn_id:
    :param input_comp_id:
    :param id:
    :param target:
    :param algorithm:
    :return:
    """
    if algorithm not in ALGORITHM_PARAMS:
        return HttpResponse(
            Response.fail(ERRORS.ALGORITHM_NOT_SUPPORTED, None).to_json())
    AtomLearn.objects.update_or_create(project_id=project_id,
                                       component_id=atom_learn_id,
                                       defaults=dict(
                                           input_comp_id=input_comp_id,
                                           feature_id=id,
                                           feature_target=target,
                                           algorithm=algorithm))
    default_params = ALGORITHM_PARAMS[algorithm]
    params = list()
    for param in default_params:
        params.append(
            AtomLearnParam(project_id=project_id,
                           component_id=atom_learn_id,
                           param_name=param,
                           param_value=str(default_params[param]['default'])))
    AtomLearnParam.objects.filter(project_id=project_id,
                                  component_id=atom_learn_id).delete()
    AtomLearnParam.objects.bulk_create(params)
    return HttpResponse(Response.success().to_json())
Esempio n. 3
0
def downLoadReportZip(request):
    project_id = request.GET.get('project_id')
    component_id = request.GET.get('component_id')
    error_msg = "参数缺失"
    if project_id is None or project_id == "":
        return Response.fail(error_msg)
    zipfiles = None
    file_objs = None
    if component_id == "" or component_id == None:
        return Response.fail(error_msg)
        zipfiles = "_".join([project_id,getDate()])
        file_objs = fileList(project_id)
    else:
        zipfiles = "_".join([project_id,component_id,getDate()])
        file_objs = fileList(project_id, component_id)

    dir = getDir(project_id,component_id)
    print(zipfiles)
    utilities = ZipUtilities()
    for file_obj in file_objs:
        tmp_dl_path = os.path.join(dir, file_obj)
        utilities.toZip(tmp_dl_path, zipfiles)
    # utilities.close()
    response = StreamingHttpResponse(utilities.zip_file, content_type='application/zip')
    response['Content-Disposition'] = 'attachment;filename="{0}"'.format(zipfiles+".zip")

    return response
Esempio n. 4
0
def update(request, project_id, component_id, field_types: List[FieldType]):
    # 修改资料
    db_field_types = []
    for field in field_types:
        db_field_types.append(field.to_db_type(project_id, component_id))
    # 检查数据类型
    response = None
    # field_types = None  # type: dict[str,FieldType]
    try:
        # 保存类型
        for db_field_type in db_field_types:
            field = db_field_type.field
            field_type = db_field_type.field_type
            selected = db_field_type.selected
            date_format = db_field_type.date_format
            CsvReaderInfotype.objects.filter(project_id=project_id,
                                             component_id=component_id,
                                             field=field).update(
                                                 field_type=field_type,
                                                 date_format=date_format,
                                                 selected=selected,
                                             )

        response = Response.success({
            "status": True,
            "data": "修改成功",
            "error": None
        })
        return response
    except UnicodeDecodeError as e:
        response = Response.fail(ERRORS.CSV_UTF8_ERROR, None)
        return response
Esempio n. 5
0
def upload(request, file, user_pk):
    """
    file upload
    :param request:
    :param file:
    :param user_pk:
    :return:
    """
    if file.size > MY_DATA_LOCAL_FILE_MAX_SIZE_IN_BYTE:
        return Response.fail(ERRORS.MY_DATA_UPLOAD_SIZE_EXCEED)

    file_name = md5(user_pk, datetime.now())

    # saving the file
    file_saving_path = os.path.join(MY_DATA_LOCAL_FILE_TMP_DIR, file_name)
    with open(file_saving_path, 'wb') as destination:
        if file.multiple_chunks():
            for chunk in file.chunks():
                destination.write(chunk)
        else:
            destination.write(file.read())

    # record the data
    LocalFile(file_name=file_name, status=STATUS.UPLOADED).save()
    return Response.success(file_name)
Esempio n. 6
0
def save(request, project_id, component_id, atom_learn_id, input_comp_id):
    atom_learn = AtomLearn.objects.filter(project_id=project_id, component_id=atom_learn_id)
    if len(atom_learn) == 0:
        return HttpResponse(Response.fail(ERRORS.ATOM_LEARN_NOT_CONFIGURED, None).to_json())
    atom_learn = atom_learn[0]
    assert isinstance(atom_learn, AtomLearn)
    learn_input_type = extract_component_type(atom_learn.input_comp_id)
    act_input_type = extract_component_type(input_comp_id)
    feature_id = atom_learn.feature_id

    if act_input_type == COMPONENTS.HIVE_READER:
        fields = IOFieldType.objects.filter(project_id=project_id, component_id=input_comp_id,
                                            field__in=[feature_id])
    elif act_input_type == COMPONENTS.ROBOTX_SPARK:
        fields = list(IOFieldType.objects.raw(robotx_field_in_query.format(
            project_id=project_id,
            component_id=input_comp_id,
            id=feature_id,
            target=''
        )))
    elif act_input_type == COMPONENTS.FEATURE_COMBINE:
        fields = list(IOFieldType.objects.raw(combine_field_in_query.format(
            project_id=project_id,
            component_id=input_comp_id,
            id=feature_id,
            target=''
        )))
    if len(fields)!=1:
        return HttpResponse(Response.fail(ERRORS.INPUT_NOT_SAME_AS_LEARN, None).to_json())
    AtomAct.objects.filter(project_id=project_id,component_id=component_id).delete()
    AtomAct(project_id=project_id,component_id=component_id,atom_learn_id=atom_learn_id,input_comp_id=input_comp_id).save()
    if learn_input_type != act_input_type:
        return HttpResponse(Response.success(ERRORS.COMPONENT_NOT_SAME_AS_LEARN).to_json())
    return HttpResponse(Response.success().to_json())
Esempio n. 7
0
def current_execution(request, project_id):
    execution = CurrentExecution.objects.filter(project_id=project_id)
    if len(execution) == 0:
        return HttpResponse(Response.success().to_json())
    execution = execution[0]  # type: CurrentExecution
    if execution.current_execution is None:
        task_status_list = list()
        tasks = Task.objects.order_by('record_time').filter(
            project_id=project_id)
        for task in tasks:
            assert isinstance(task, Task)
            component_id = task.component_id
            task_status = task.task_status
            detail = task.detail
            has_log = task.has_log
            application_id = task.application_id
            error_code = task.error_code
            tracking_url = task.tracking_url
            start_time = task.start_time
            end_time = task.end_time
            task_status_list.append(
                dict(component_id=component_id,
                     task_status=task_status,
                     error_code=error_code,
                     application_id=application_id,
                     tracking_url=tracking_url,
                     detail=detail,
                     has_log=has_log,
                     start_time=start_time,
                     end_time=end_time))
        return HttpResponse(Response.success(task_status_list).to_json())
    return HttpResponse(
        Response.success(execution.current_execution).to_json())
Esempio n. 8
0
def search_table(request, filename, index, page_num):
    object = MyData.objects.filter(file_name__icontains= filename).order_by('creat_time').reverse() \
        [(int(index) - 1) * int(page_num):int(index) * int(page_num)]
    sum_data = MyData.objects.filter(file_name__icontains=filename)
    print(len(sum_data))
    sum_index = len(sum_data) / int(page_num)
    remainder = len(sum_data) % int(page_num)
    if remainder > 0:
        sum_index_page = int(sum_index + 1)
        print(sum_index_page)
    else:
        sum_index_page = math.floor(sum_index)
        print(sum_index_page)
    sum_index_x = dict()
    sum_index_x['sum_index'] = sum_index_page
    if len(object) == 0:
        return HttpResponse(Response.success().to_json())
    json_data = []
    for row_obj in object:
        result = dict()  # temp store one jsonObject
        result['file_name'] = row_obj.file_name
        result['field_num'] = row_obj.field_num
        result['file_size'] = row_obj.file_size
        result['craet_time'] = row_obj.creat_time
        result['creat_user'] = row_obj.creat_user
        json_data.append(result)
    json_data.append(sum_index_x)
    return HttpResponse(Response.success(json_data).to_json())
Esempio n. 9
0
def execution_status(request, project_id, task_id):
    execution = Execution.objects.filter(project_id=project_id,
                                         task_id=task_id)
    if len(execution) == 0:
        return HttpResponse(Response.fail(ERRORS.NO_SUCH_TASK).to_json())
    execution = execution[0]
    tasks = Task.objects.order_by('record_time').filter(project_id=project_id,
                                                        task_id=task_id)
    query = ExecutionQuery(execution.status, execution.start_time,
                           execution.end_time)
    for task in tasks:
        assert isinstance(task, Task)
        component_id = task.component_id
        task_status = task.task_status
        detail = task.detail
        has_log = task.has_log
        application_id = task.application_id
        error_code = task.error_code
        tracking_url = task.tracking_url
        start_time = task.start_time
        end_time = task.end_time
        query.add_detail(component_id, task_status, detail, error_code,
                         application_id, tracking_url, has_log, start_time,
                         end_time)
    if execution.status != ExecutionStatus.RUNNING:
        CurrentExecution.objects.filter(project_id=project_id).update(
            current_execution=None)
    return HttpResponse(Response.success(query).to_json())
Esempio n. 10
0
def delete(request, project_id, component_id):
    input_type = extract_component_type(component_id)
    if input_type == COMPONENTS.CSV_READER:
        CsvReaderInfo.objects.filter(project_id=project_id,
                                     component_id=component_id).delete()
        CsvReaderInfotype.objects.filter(project_id=project_id,
                                         component_id=component_id).delete()
    elif input_type == COMPONENTS.ATOM_ACT:
        AtomActModel.objects.filter(project_id=project_id,
                                    component_id=component_id).delete()
    elif input_type == COMPONENTS.ATOM_LEARN:
        AtomLearnModel.objects.filter(project_id=project_id,
                                      component_id=component_id).delete()
        AtomLearnParam.objects.filter(project_id=project_id,
                                      component_id=component_id).delete()
    elif input_type == COMPONENTS.ATOM_TEST:
        AtomTestModel.objects.filter(project_id=project_id,
                                     component_id=component_id).delete()
    elif input_type == COMPONENTS.ATOM_EXPLORE:
        AtomExploreModel.objects.filter(project_id=project_id,
                                        component_id=component_id).delete()
        AtomExploreParam.objects.filter(project_id=project_id,
                                        component_id=component_id).delete()
    elif input_type == COMPONENTS.ROBOTX:
        Container.objects.filter(project_id=project_id,
                                 component_id=component_id).delete()
        Relation.objects.filter(project_id=project_id,
                                component_id=component_id).delete()
    else:
        return Response.fail(ERRORS.CSV_TYPE_ERROR, None)
    return Response.success()
Esempio n. 11
0
def queryLog(request, project_id, component_id, task_id):

    tasksLogs = Task.objects.filter(project_id=project_id,
                                    component_id=component_id,
                                    task_id=task_id)
    if len(tasksLogs) == 0:
        return Response.success()
    params = list()
    for tasksLog in tasksLogs:
        params.append(
            dict(
                component_id=component_id,
                project_id=project_id,
                task_id=task_id,
                component_type=tasksLog.component_type,
                error_code=tasksLog.error_code,
                application_id=tasksLog.application_id,
                detail=tasksLog.detail,
                has_log=tasksLog.has_log,
                task_status=tasksLog.task_status,
                relies=tasksLog.relies,
                submit_time=tasksLog.submit_time,
                record_time=tasksLog.record_time,
                start_time=tasksLog.start_time,
                end_time=tasksLog.end_time,
            ))

        return Response.success(params)
Esempio n. 12
0
def save(request, project_id, component_id, atom_act_id, input_comp_id,
         feature_id, feature_target):
    if feature_target == "" or feature_id == "":
        return Response.fail(ERRORS.PARAMS_NOT_IS_NULL)
    if check_target(project_id, input_comp_id, feature_target):
        return Response.fail(ERRORS.TARGET_FIELD_SELECT_ERROR)
    atom_acts = AtomAct.objects.filter(project_id=project_id,
                                       component_id=atom_act_id)
    if len(atom_acts) == 0:
        return Response.fail(ERRORS.ATOM_ACT_NOT_CONFIGURED, None)
    atom_act = atom_acts[0]
    assert isinstance(atom_act, AtomAct)

    # learn_input_type = extract_component_type(atom_act.input_comp_id)
    # test_input_type = extract_component_type(input_comp_id)
    # feature_id = atom_act.feature_id
    # feature_target = atom_act.feature_target

    csv_readers = CsvReaderInfo.objects.filter(project_id=project_id,
                                               component_id=input_comp_id)
    if len(csv_readers) == 0:
        return Response.fail(ERRORS.COMPONENT_NOT_EXIST, None)
    csv_reader = csv_readers[0]
    assert isinstance(csv_reader, CsvReaderInfo)

    AtomTest.objects.filter(project_id=project_id,
                            component_id=component_id).delete()
    AtomTest(project_id=project_id,
             component_id=component_id,
             atom_act_id=atom_act_id,
             input_comp_id=input_comp_id,
             feature_id=feature_id,
             feature_target=feature_target).save()
    return Response.success('')
Esempio n. 13
0
def stop_all(request, project_id):
    current = CurrentExecution.objects.filter(
        project_id=project_id)  # type:list[CurrentExecution]
    if len(current) == 0:
        return Response.success("CURRENT_PROJECT_IS_NOT_EXECUTING")
    task_id = current[0].current_execution
    execution = Execution.objects.filter(project_id=project_id,
                                         task_id=task_id)
    if execution[0].status != ExecutionStatus.RUNNING:
        return Response.success("CURRENT_PROJECT_IS_NOT_EXECUTING")
    tasks = Task.objects.filter(
        project_id=project_id,
        task_id=task_id,
        task_status__in=[apps.PENDING, apps.SUBMITTED,
                         apps.RUNNING])  # type: list[Task]
    running_without_app = list()
    for task in tasks:
        if task.task_status == apps.PENDING:
            task.relies = task.relies + 1
            task.save()
            logger.info("task[%s-%s-%s] PENDING CANCEL" %
                        (project_id, task.component_id, task_id))
        elif task.task_status == apps.SUBMITTED:
            celery_app.control.revoke(task.celery_id, terminate=True)
            Task.objects.filter(
                project_id=project_id,
                task_id=task_id,
                task_status=apps.SUBMITTED).update(task_status=apps.KILLED)
            logger.info("task[%s-%s-%s] SUBMMITED REVOKE" %
                        (project_id, task.component_id, task_id))
        elif task.task_status == apps.RUNNING:
            # if task.application_id is not None:
            celery_app.control.revoke(task.celery_id, terminate=True)
            logger.info("task[%s-%s-%s] SUBMMITED REVOKE" %
                        (project_id, task.component_id, task_id))
            Task.objects.filter(
                project_id=project_id,
                task_id=task_id,
                task_status=apps.RUNNING).update(task_status=apps.KILLED)
            # else:
            #     running_without_app.append(task.component_id)
    while len(running_without_app) != 0:
        time.sleep(3)
        tasks = Task.objects.filter(project_id=project_id,
                                    task_id=task_id,
                                    task_status=apps.RUNNING,
                                    component_id__in=running_without_app)
        if len(tasks) == 0:
            break
        running_without_app = list()
        for task in tasks:
            celery_app.control.revoke(task.celery_id, terminate=True)
            logger.info("task[%s-%s-%s] SUBMMITED REVOKE" %
                        (project_id, task.component_id, task_id))
    # CurrentExecution.objects.filter(project_id=project_id).delete()
    Execution.objects.filter(project_id=project_id, task_id=task_id).update(
        status=apps.KILLED, end_time=datetime.datetime.now(), task_count=0)

    return Response.success()
Esempio n. 14
0
def load(request, project_id, atom_learn_id, input_comp_id):
    atom_learn_db = AtomLearn.objects.filter(project_id=project_id,
                                             component_id=atom_learn_id)
    if len(atom_learn_db) == 0:
        # 刚新建组件
        return HttpResponse(Response.success().to_json())
    data_changed = HttpResponse(Response.success("changed").to_json())
    atom_learn = atom_learn_db[0]
    # 检查 input_comp_id 是否一样
    if atom_learn.input_comp_id != input_comp_id:
        atom_learn_db.delete()
        return data_changed
    # todo 检查 id, target 是否在其中,还缺少 robotx和自定义特征组合
    fields = list()
    if input_comp_id.startswith(COMPONENTS.HIVE_READER):
        # hive reader
        # hive reader 中是否包含这两个字段
        fields = IOFieldType.objects.filter(
            project_id=project_id,
            component_id=input_comp_id,
            field__in=[atom_learn.feature_id, atom_learn.feature_target])
    elif input_comp_id.startswith(COMPONENTS.ROBOTX_SPARK):
        # RobotXSpark
        fields = list(
            IOFieldType.objects.raw(
                robotx_field_in_query.format(
                    project_id=project_id,
                    component_id=input_comp_id,
                    id=atom_learn.feature_id,
                    target=atom_learn.feature_target)))
    elif input_comp_id.startswith(COMPONENTS.FEATURE_COMBINE):
        # feature combine
        fields = list(
            IOFieldType.objects.raw(
                combine_field_in_query.format(
                    project_id=project_id,
                    component_id=input_comp_id,
                    id=atom_learn.feature_id,
                    target=atom_learn.feature_target)))
    # id target 不在字段中
    if len(fields) != 2:
        atom_learn_db.delete()
        return data_changed

    # 检查通过,返回需要初始化的内容
    algorithm_params = ALGORITHM_PARAMS[atom_learn.algorithm]
    atom_learn_params = AtomLearnParam.objects.filter(
        project_id=project_id, component_id=atom_learn_id)
    params = list()
    for atom_learn_param in atom_learn_params:
        algorithm_param = copy.copy(
            algorithm_params[atom_learn_param.param_name])
        algorithm_param['value'] = atom_learn_param.param_value
        params.append(algorithm_param)
    result = dict(id=atom_learn.feature_id,
                  target=atom_learn.feature_target,
                  algorithm=atom_learn.algorithm,
                  params=params)
    return HttpResponse(Response.success(result).to_json())
Esempio n. 15
0
def load_xml(request, project_id, component_id):
    config_path = os.path.join(WORKING_DIRECTORY, project_id, component_id, "RobotXSpark.xml")
    if not os.path.exists(config_path):
        return HttpResponse(Response.success('').to_json())

    with open(config_path, 'r') as f:
        xml = "".join(f.readlines())
        return HttpResponse(Response.success(xml).to_json())
Esempio n. 16
0
def load_info(request, project_id, component_id):
    objs = CsvReaderInfo.objects.filter(project_id=project_id,
                                        component_id=component_id)
    if len(objs) == 0:
        response = Response.fail(ERRORS.COMPONENT_NOT_EXIST, None)
        return response
    response = Response.success(objs[0].magic_name)
    return response
Esempio n. 17
0
def load(request, project_id):
    config_path = os.path.join(WORKING_DIRECTORY, project_id, "config.xml")
    if not os.path.exists(config_path):
        return Response.success('')

    with open(config_path, 'r') as f:
        xml = "".join(f.readlines())
        return Response.success(xml)
Esempio n. 18
0
def get_log(request, project_id, component_id):
    task = Task.objects.filter(project_id=project_id,
                               component_id=component_id)
    if len(task) == 0:
        return Response.fail(ERRORS.NO_SUCH_TASK)
    task = task[0]
    if not task.has_log:
        return Response.fail(ERRORS.TASK_HAS_NO_LOG)
    return Response.success(Component.fetch_log(project_id, component_id))
Esempio n. 19
0
def load(request, project_id, component_id):
    db_field_types = AtomAct.objects.filter(project_id=project_id,
                                            component_id=component_id)
    if len(db_field_types) == 0:
        return Response.success()
    db_field_type = db_field_types[0]
    # return Response.fail(ERRORS.ATOM_ACT_NOT_CONFIGURED, None)
    result = dict(reason_code_nvars=db_field_type.reason_code_nvars,
                  ncores=db_field_type.ncores)
    return Response.success(result)
Esempio n. 20
0
def load(request, project_id, atom_learn_id, input_comp_id):
    atom_learn_db = AtomLearn.objects.filter(project_id=project_id,
                                             component_id=atom_learn_id)
    if len(atom_learn_db) == 0:
        # 刚新建组件
        return Response.success()
    data_changed = Response.success("changed")
    atom_learn = atom_learn_db[0]
    # 检查 input_comp_id 是否一样
    if atom_learn.input_comp_id != input_comp_id:
        atom_learn_db.delete()
        return data_changed
    # todo 检查 id, target 是否在其中,还缺少 robotx和自定义特征组合
    fields = list()
    if input_comp_id.startswith(COMPONENTS.CSV_READER):
        fields = CsvReaderInfotype.objects.filter(project_id=project_id,
                                                  component_id=input_comp_id)
        # ,field__in=[atom_learn.feature_id, atom_learn.feature_target])
    elif input_comp_id.startswith(COMPONENTS.ROBOTX):
        # RobotX
        fields = list(
            CsvReaderInfotype.objects.raw(
                robotx_field_in_query.format(
                    project_id=project_id,
                    component_id=input_comp_id,
                    id=atom_learn.feature_id,
                    target=atom_learn.feature_target)))
    elif input_comp_id.startswith(COMPONENTS.ATOM_EXPLORE):
        fields = AtomLearnParam.objects.filter(project_id=project_id,
                                               component_id=atom_learn_id)

    # id target 不在字段中
    # if len(fields) == 0:
    #     atom_learn_db.delete()
    #     return data_changed

    # 检查通过,返回需要初始化的内容
    algorithm_params = ALGORITHM_PARAMS[atom_learn.algorithm]
    atom_learn_params = AtomLearnParam.objects.filter(
        project_id=project_id, component_id=atom_learn_id)
    params = list()
    for atom_learn_param in atom_learn_params:
        if atom_learn_param.param_name in algorithm_params:
            algorithm_param = copy.copy(
                algorithm_params[atom_learn_param.param_name])
            algorithm_param['value'] = atom_learn_param.param_value
            params.append(algorithm_param)

    result = dict(
        # id=atom_learn.feature_id,
        # target=atom_learn.feature_target,
        algorithm=atom_learn.algorithm,
        params=params)

    return Response.success(result)
Esempio n. 21
0
def robotx_spark_key_fields(request, project_id, component_id):
    objs = Container.objects.filter(project_id=project_id,
                                    component_id=component_id)
    if len(objs) == 0:
        response = Response.fail(ERRORS.ROBOTX_NOT_CONFIGURED, None)
        return HttpResponse(response.to_json())

    container = objs[0]
    assert isinstance(container, Container)
    key_fields = container.key_fields.split(",")
    return HttpResponse(Response.success(key_fields).to_json())
Esempio n. 22
0
def execute(request, project_id, xml, execution_type, execute_id=None):
    execution = CurrentExecution.objects.filter(project_id=project_id)
    if len(execution) != 0:
        execution = execution[0]
        if execution.current_execution is not None:
            return HttpResponse(
                Response.fail(ERRORS.TASK_IS_EXECUTING).to_json())

    save_xml(project_id, xml)

    root = et.fromstring(xml)
    flows = root.findall("./sequenceFlow")
    topology = Toplogy()
    for flow in flows:
        start = flow.get('sourceRefs')
        end = flow.get('targetRefs')
        topology.add_line(start, end)

    topology.sort()
    # 根据不同执行类型,构造不同的需要执行的组件
    try:
        if execution_type == FULL_EXECUTION:
            levels = topology.levels
            need_execute = OrderedDict()
            # 获取需要执行的组件
            for level in levels:
                for point in level:
                    if point.type in EXECUTABLE:
                        # 生成需要执行 配置文件
                        executor_class = eval(point.type)
                        executor = executor_class(project_id, point.id)
                        executor.need_execution(force=True)
                        need_execute[point.id] = point
            task_id = execute_components(need_execute, project_id)
        elif execution_type == CONT_EXECUTION:
            levels = topology.levels
            # 记录需要执行的组件
            need_execute = get_need_execute(levels, project_id)
            task_id = execute_components(need_execute, project_id)
        elif execution_type == SING_EXECUTION:
            levels, flat_points = topology.get_previous_component(execute_id)

            need_execute = get_need_execute(levels[:-1], project_id,
                                            flat_points)
            execute_point = list(levels[-1])[0]
            need_execute[execute_id] = execute_point
            executor_class = eval(execute_point.type)
            executor = executor_class(project_id, execute_point.id)
            executor.need_execution(force=True)
            task_id = execute_components(need_execute, project_id)
    except Exception as e:
        error_code = str(e)
        return HttpResponse(Response.fail(error_code, None).to_json())
    return HttpResponse(Response.success(task_id).to_json())
Esempio n. 23
0
def structure(request, table_name):
    # 检查 table_name 为非空
    check = VALIDATE.not_null_validate(table_name, 'table_name')
    if check is not None:
        response = Response.fail(ERRORS.PARAMETER_VALUE_ERROR, check)
        return HttpResponse(response.to_json())

    result = py4j_common_hive_util('checkExist', table_name)
    if isinstance(result, HttpResponse):
        return result
    if not result:
        return HttpResponse(
            Response.fail(ERRORS.HIVE_TABLE_NOT_EXIST, None).to_json())

    result = py4j_common_hive_util('describeAndSample', table_name)
    result = list(result)
    result_trans = list()
    # 从数据库类型映射到建模类型
    # 1. 不支持的数据类型,标记 ignore为 true
    # 2. 日期类型,标记前端,可选的最小粒度,记录在字段 date_format中
    for field_desc in result:
        field = field_desc.getName()
        database_type_trans = field_desc.getType()
        ignore = True
        field_type = None
        date_format = None
        date_size = None
        if database_type_trans in DATABASE_MAPPING:
            ignore = False
            field_type = DATABASE_MAPPING[database_type_trans]

            sample_data = field_desc.getSampleData()
            if field_type == FIELDTYPE.FACTOR:
                if sample_data is not None:
                    sample_data = list(sample_data)
                    date_, size_ = is_date(sample_data)
                    if date_:
                        date_format = size_
                        date_size = size_
                        field_type = FIELDTYPE.DATE

            if database_type_trans == 'TIMESTAMP':
                date_format = 'second'
                date_size = 'second'
            elif database_type_trans == 'DATE':
                date_format = 'day'
                date_size = 'day'
        struct = StructureClass(field, field_type, database_type_trans,
                                date_format, date_size, ignore)
        result_trans.append(struct)
    # result_trans.sort(key=lambda x: x.field)
    response = Response.success(result_trans)
    return HttpResponse(response.to_json())
Esempio n. 24
0
def save_relation(request, project_id, component_id, robotx_spark_id,
                  self_defined_feature_id, connections: List[Connection]):
    # 检查robotx
    objs = Container.objects.filter(project_id=project_id,
                                    component_id=robotx_spark_id)
    if len(objs) == 0:
        response = Response.fail(ERRORS.ROBOTX_NOT_CONFIGURED, None)
        return HttpResponse(response.to_json())

    connection_of_robotx = set()
    connection_of_self_defined = set()
    feature_combine_relations = list()
    for connection in connections:
        connection_of_robotx.add(connection.robotx_field)
        connection_of_self_defined.add(connection.self_defined_field)
        feature_combine_relations.append(
            FeatureCombineRelation(
                project_id=project_id,
                component_id=component_id,
                robotx_field=connection.robotx_field,
                self_defined_field=connection.self_defined_field))

    # 检查连接字段是否在robotx中
    container = objs[0]
    table_name = container.table_name
    key_fields = set(container.key_fields.split(","))
    if not key_fields.issuperset(connection_of_robotx):
        response = Response.fail(ERRORS.FIELD_NOT_FOUND_IN_ROBOTX, None)
        return HttpResponse(response.to_json())

    # 检查 self_defined_feature
    objs = SelfDefinedFeatureType.objects.filter(
        field__in=connection_of_self_defined,
        project_id=project_id,
        component_id=self_defined_feature_id)
    if len(objs) != len(connection_of_self_defined):
        response = Response.fail(ERRORS.FIELD_NOT_FOUND_IN_SELF_DEFINED, None)
        return HttpResponse(response.to_json())

    # 检查通过,保存
    FeatureCombine.objects.filter(project_id=project_id,
                                  component_id=component_id).delete()
    FeatureCombine(project_id=project_id,
                   component_id=component_id,
                   robotx_table_name=table_name,
                   robotx_spark_id=robotx_spark_id,
                   self_defined_feature_id=self_defined_feature_id).save()

    FeatureCombineRelation.objects.filter(project_id=project_id,
                                          component_id=component_id).delete()
    FeatureCombineRelation.objects.bulk_create(feature_combine_relations)
    return HttpResponse(Response.success().to_json())
Esempio n. 25
0
def container_fields(request, project_id, component_id):
    query_sql = container_fields_sql.format(project_id=project_id,
                                            component_id=component_id)
    field_types = list(IOFieldType.objects.raw(query_sql))
    if len(field_types) == 0:
        return HttpResponse(Response.success().to_json())
    structures = []
    for field_type in field_types:
        structure = StructureClass(field_type.field, field_type.field_type,
                                   field_type.database_type,
                                   field_type.date_format,
                                   field_type.date_size, field_type.ignore)
        structures.append(structure)
    return HttpResponse(Response.success(structures).to_json())
Esempio n. 26
0
def view_table(request, project_id, component_id):
    robotx_task = Task.objects.filter(project_id=project_id, component_id=component_id)
    if len(robotx_task)==0:
        return HttpResponse(Response.fail(ERRORS.ROBOTX_NOT_SUCCESS).to_json())
    robotx_task = robotx_task[0]
    assert isinstance(robotx_task, Task)
    if robotx_task.task_status != TASK_STATUS.SUCCEEDED:
        return HttpResponse(Response.fail(ERRORS.ROBOTX_NOT_SUCCESS).to_json())

    result_table = RobotXSpark.output_table(project_id, component_id)
    result = py4j_common_hive_util('viewTable', result_table, 10)
    if isinstance(result, HttpResponse):
        return result
    return HttpResponse(Response.success([dict(name=k,value=list(v)) for k,v in result.items()]).to_json())
Esempio n. 27
0
def load_hive_reader(request, project_id, component_id):
    component_id_validate = VALIDATE.component_id_validate(component_id, COMPONENTS.HIVE_READER)
    if component_id_validate is not None:
        return HttpResponse(component_id_validate.to_json())
    hive_readers = HiveReader.objects.filter(project_id=project_id, component_id=component_id)
    if len(hive_readers)==0:
        # 组件不存在
        response = Response.success()
        return HttpResponse(response.to_json())
    hive_reader = hive_readers[0]
    response = Response.success(dict(
        table_name = hive_reader.table_name,
        logic_name = hive_reader.logic_name
    ))
    return HttpResponse(response.to_json())
Esempio n. 28
0
def kill_task(request, project_id, task_id):
    executions = Execution.objects.filter(project_id=project_id,
                                          task_id=task_id)
    if len(executions) == 0:
        return Response.success("NO_SUCH_TASK")
    else:
        try:
            celery_app.control.revoke(task_id, terminate=True)
            execution = executions[0]
            execution.task_count = 0
            execution.status = apps.KILLED
            execution.save()
        except RuntimeError:
            return Response.success(apps.FAILED)
    return Response.success(apps.KILLED)
Esempio n. 29
0
def saveInfo(request, project_id, component_id, magic_name, file_name):
    try:
        # 保存组件
        CsvReaderInfo.objects.filter(project_id=project_id,
                                     component_id=component_id).delete()
        CsvReaderInfo(project_id=project_id,
                      component_id=component_id,
                      magic_name=magic_name,
                      file_name=file_name).save()
        result = {"data": {}, "status": True, "error": ""}
        response = Response.success(result)
        return response
    except UnicodeDecodeError as e:
        response = Response.fail(ERRORS.CSV_SAVE_ERROR, None)
        return response
Esempio n. 30
0
def preview(result, project_id, component_id):
    reader = HiveReader.objects.filter(project_id=project_id,
                                       component_id=component_id)
    if len(reader) == 0:
        return HttpResponse(
            Response.fail(ERRORS.HIVE_TABLE_NOT_EXIST).to_json())
    reader = reader[0]
    table_name = reader.table_name
    result = py4j_common_hive_util('viewTable', table_name, 10)
    if isinstance(result, HttpResponse):
        return result
    return HttpResponse(
        Response.success([
            dict(name=k, value=list(v)) for k, v in result.items()
        ]).to_json())