예제 #1
0
    def execute(self,
                env: MasonEnvironment,
                response: Response,
                dry_run: bool = True) -> OperatorResponse:
        try:
            module = self.module(env)
            if isinstance(module, OperatorDefinition):
                if dry_run:
                    response.add_info(
                        f"Valid Operator: {self.namespace}:{self.command} with specified parameters."
                    )
                    return OperatorResponse(response)
                else:
                    operator_response: OperatorResponse = module.run(
                        env, self.config, self.parameters, response)
            else:
                response.add_error(
                    f"Module does not contain a valid OperatorDefinition. See /examples for sample operator implementations. \nMessage: {module.reason}"
                )
                operator_response = OperatorResponse(response)
        except ModuleNotFoundError as e:
            response.add_error(f"Module Not Found: {e}")
            operator_response = OperatorResponse(response)

        return operator_response
예제 #2
0
    def register_dag(self, schedule_name: str, valid_dag: ValidDag,
                     schedule: Optional[Schedule], response: Response):
        #  Short-circuit for glue crawler definition since glue as a scheduler is only well defined for Table Infer Operator
        if len(valid_dag.valid_steps) == 1 and valid_dag.valid_steps[
                0].operator.type_name() == "TableInfer":
            op = valid_dag.valid_steps[0].operator
            params = valid_dag.valid_steps[0].operator.parameters
            db_name = params.get_required("database_name")

            storage_engine = op.config.storage()
            if isinstance(storage_engine, StorageClient):
                storage_path = storage_engine.path(
                    params.get_required("storage_path"))
            else:
                response = response.add_error(
                    f"Attempted to register_dag for invalid client: {storage_engine.reason}"
                )
            response = self.register_schedule(db_name, storage_path,
                                              schedule_name, schedule,
                                              response)
        else:
            response.add_error(
                "Glue Scheduler only defined for TableInfer type which registers a glue crawler"
            )

        return (schedule_name, response, None)
예제 #3
0
    def run(
        self, env: MasonEnvironment, response: Response = Response()
    ) -> OperatorResponse:
        scheduler = self.config.scheduler()
        if isinstance(scheduler, SchedulerClient):
            response.add_info(
                f"Registering workflow dag {self.name} with {scheduler.client.name()}."
            )
            schedule_id, response, client_dag = scheduler.register_dag(
                self.name, self.dag, self.schedule, response)
            if not response.errored():
                response.add_info(f"Registered schedule {schedule_id}")
            # TODO: FIX
            # if client_dag and output_path:
            #     with tempfile.NamedTemporaryFile("w", delete=False) as f:
            #         json = client_dag.to_json()
            #         response.add_info(f"Saving client dag to {output_path}")
            #         f.write(json)
            #         f.close()
            #         response = self.config.storage.client.save_to(f.name, output_path, response)
            if self.schedule:
                response.add_warning(
                    f"Triggering workflow off schedule: {self.schedule.definition}"
                )

            response.add_info(f"Triggering schedule: {schedule_id}")
            response = scheduler.trigger_schedule(schedule_id, response, env)
        else:
            response.add_error("Scheduler client not defined")

        return OperatorResponse(response)
예제 #4
0
    def register_schedule(self, database_name: str, path: Path,
                          schedule_name: str, schedule: Optional[Schedule],
                          response: Response):
        create_crawler_response = self.create_glue_crawler(
            database=database_name,
            name=schedule_name,
            role=self.aws_role_arn or "",
            path=path.clean_path_str(),
            schedule=schedule)

        response.add_response(create_crawler_response)
        error, status, message = self.parse_response(create_crawler_response)

        if error == "AlreadyExistsException":
            response.add_warning(
                f"Table crawler {schedule_name} already exists. Skipping creation."
            )
            response.set_status(201)
        elif error == "CrawlerRunningException":
            response.add_warning(
                f"Table crawler {schedule_name} is already refreshing.")
            response.set_status(202)
        elif 200 <= status < 300:
            response.add_info(f"Created table crawler {schedule_name}.")
            response.set_status(201)
        else:
            response.add_error(message)
            response.set_status(status)

        return response
예제 #5
0
    def save_to(self, inpath: Path, outpath: Path, response: Response):
        try:
            self.client().upload(inpath.path_str, outpath.path_str)
        except Exception as e:
            response.add_error(f"Error saving {inpath} to {outpath.path_str}")
            response.add_error(message(e))

        return response
예제 #6
0
    def to_response(self, response: Response) -> Response:
        for it in self.invalid_tables:
            response = it.to_response(response)

        if self.error:
            response.add_error(self.error)

        return response
예제 #7
0
파일: malformed.py 프로젝트: kyprifog/mason
 def save(
     self,
     state_store: MasonStateStore,
     overwrite: bool = False,
     response: Response = Response()) -> Response:
     message = self.get_message()
     if message:
         response.add_error(message)
     return response
예제 #8
0
 def execute(self,
             env: MasonEnvironment,
             response: Response,
             dry_run: bool = True,
             run_now: bool = False,
             schedule_name: Optional[str] = None) -> Response:
     response.add_error(f"Invalid Operator.  Reason:  {self.reason}")
     response.set_status(400)
     return response
예제 #9
0
    def trigger_schedule(self, schedule_name: str, response: Response,
                         env: MasonEnvironment) -> Response:
        dag = self.dag
        if dag:
            workflow_run = WorkflowRun(dag)
            response = workflow_run.run(env, response)
        else:
            response.add_error("Dag not found.  Run 'register_dag' first.")

        return response
예제 #10
0
파일: operator.py 프로젝트: kyprifog/mason
 def save(self, state_store: MasonStateStore, overwrite: bool = False, response: Response = Response()) -> Response:
     try:
         result = state_store.cp_source(self.source_path, "operator", self.namespace, self.command, overwrite)
         if isinstance(result, FailedOperation):
             response.add_error(f"{result.message}")
         else:
             response.add_info(result)
     except Exception as e:
         response.add_error(f"Error copying source: {message(e)}")
         
     return response
예제 #11
0
    def run(self, env: MasonEnvironment, response: Response) -> Response:
        response.add_info(f"Running dag \n{self.dag.display()}")

        while not self.finished():
            self.step(env)

        for step in sorted(self.executed_steps):
            response = response.merge(step.operator_response.response)

        if len(self.invalid_steps) > 0:
            response.add_error(f"Workflow failed")
            for i in self.invalid_steps:
                response.add_error(i.reason)
            response.set_status(400)

        return response
예제 #12
0
    def trigger_schedule_for_table(self, table_name: str, database_name: str,
                                   response: Response):
        table, response = self.get_table(database_name, table_name)

        crawler_name = None
        if isinstance(table, Table):
            created_by = table.created_by
            cb = created_by or ""
            if "crawler:" in cb:
                crawler_name = cb.replace("crawler:", "")
                self.trigger_schedule(crawler_name, response)
            else:
                response.add_error(
                    f"Table not created by crawler. created_by: {created_by}")
        else:
            response.add_error(f"Could not find table {table_name}")
            response.set_status(404)

        return response
예제 #13
0
    def delete_schedule(self, schedule_name: str,
                        response: Response) -> Response:
        try:
            glue_response = self.client().delete_crawler(Name=schedule_name)

        except ClientError as e:
            glue_response = e.response

        error, status, message = self.parse_response(glue_response)
        response.add_response(glue_response)

        if not error == "":
            response.set_status(status)
            response.add_error(message)

        else:
            response.add_info(
                f"Schedule {schedule_name} successfully deleted.")

        return response
예제 #14
0
    def trigger_schedule(self, schedule_name: str, response: Response):
        refresh_glue_table_response = self.refresh_glue_table(schedule_name)
        error, status, message = self.parse_response(
            refresh_glue_table_response)

        response.add_response(refresh_glue_table_response)

        if error == "CrawlerRunningException":
            response.add_warning(
                f"Table crawler {schedule_name} is already refreshing.")
            response.add_data({})
            response.set_status(202)
        elif status:
            if 200 <= status < 300:
                response.add_info(f"Refreshing Table Crawler: {schedule_name}")
                response.add_data({})
                response.set_status(201)
        else:
            response.add_error(message)
            response.set_status(status)
        return response
예제 #15
0
파일: config.py 프로젝트: kyprifog/mason
def config(config_id: Optional[str],
           set_current: bool = False,
           log_level: Optional[str] = None,
           env: Optional[MasonEnvironment] = None,
           printer: Printer = ApiPrinter()):
    environment = env or MasonEnvironment().initialize()
    logger.set_level(log_level)
    response = Response()
    if set_current and config_id:
        result = Resources(environment).set_session_config(config_id)
        if isinstance(result, str):
            response.add_error(result)
            response.set_status(404)
        else:
            response.add_info(f"Set session config to {config_id}")
            config_id = None

    res = Resources(environment)
    configs = res.get_resources("config", config_id)
    response = printer.print_resources(configs,
                                       "config",
                                       config_id,
                                       environment=environment)
    return response.with_status()
예제 #16
0
    def print_resources(
            self,
            resources: List[Union[Operator, Workflow, Config,
                                  MalformedResource]],
            type: Optional[str] = None,
            namespace: Optional[str] = None,
            command: Optional[str] = None,
            environment: Optional[MasonEnvironment] = None) -> Response:
        operators, workflows, configs, bad = sequence_4(
            resources, Operator, Workflow, Config, MalformedResource)
        response = Response()

        if len(resources) == 0:
            response.add_error(self.none_message(type, namespace, command))
            response.set_status(404)
        else:
            if len(operators) > 0:
                response.add("Operators",
                             list(map(lambda o: o.to_dict(), operators)))
            if len(configs) > 0:
                current_id: Optional[str] = None
                if environment:
                    current_id = environment.state_store.get_session_config()
                response.add(
                    "Configs",
                    list(map(lambda c: c.to_dict(current_id), configs)))
            if len(workflows) > 0:
                response.add("Workflows",
                             list(map(lambda w: w.to_dict(), workflows)))
            if len(bad) > 0:
                response.add("Errors", list(map(lambda b: b.get_message(),
                                                bad)))
                if len(operators + configs + workflows) == 0:  # type: ignore
                    response.set_status(400)

        return response
예제 #17
0
def run(resource_type: str,
        namespace: str,
        command: str,
        parameter_string: Optional[str] = None,
        param_file: Optional[str] = None,
        config_id: Optional[str] = None,
        log_level: Optional[str] = None,
        env: Optional[MasonEnvironment] = None,
        dry_run: bool = False,
        parameters: Optional[dict] = None,
        printer=ApiPrinter()):
    response = Response()
    environment: MasonEnvironment = env or MasonEnvironment().initialize()
    logger.set_level(log_level)
    res = base.Resources(environment)

    resource: Union[Resource, MalformedResource] = res.get_resource(
        resource_type, namespace, command)
    config: Union[Config, MalformedResource] = res.get_best_config(config_id)
    params: Union[Parameters, MalformedResource] = res.get_parameters(
        resource_type, parameter_string, param_file, parameters)

    if isinstance(resource, Resource) and isinstance(
            config, Config) and isinstance(params, Parameters):
        if dry_run:
            response = validate_resource(resource, config, params,
                                         environment).dry_run(
                                             environment,
                                             response).to_response(response)
        else:
            response = validate_resource(resource, config, params,
                                         environment).run(
                                             environment,
                                             response).to_response(response)
    else:
        if isinstance(resource, MalformedResource):
            response.add_error(f"Malformed Resource: {resource.get_message()}")
        elif isinstance(config, MalformedResource):
            response.add_error(f"Bad Config: {config.get_message()}")
        elif isinstance(params, MalformedResource):
            response.add_error(f"Bad Parameters: {params.get_message()}")

    return printer.print_response(response)
예제 #18
0
 def to_response(self, response: Response):
     response.add_error(self.reason)
     response.add_data(self.schema_conflict.to_dict())
     response.set_status(403)
     return response
예제 #19
0
 def _missing(response: Response, *args, **kwargs) -> Response:
     response.add_error(f"Invalid Client: {self.reason}")
     return response
예제 #20
0
 def to_response(self, response: Response):
     if self.reason:
         response.add_error(f"Job errored: " + self.reason)
     return response
예제 #21
0
파일: invalid.py 프로젝트: kyprifog/mason
 def dry_run(
     self, env: MasonEnvironment, response: Response = Response()
 ) -> OperatorResponse:
     response.add_error("Invalid Resource: " + self.reason)
     response.set_status(400)
     return OperatorResponse(response)
예제 #22
0
 def to_response(self, response: Response):
     response.add_error(self.reason)
     response.set_status(404)
     return response
예제 #23
0
 def run(
     self, env: MasonEnvironment, response: Response = Response()
 ) -> OperatorResponse:
     response.add_error(f"Invalid Operator.  Reason:  {self.reason}")
     response.set_status(400)
     return OperatorResponse(response)
예제 #24
0
 def to_response(self, response: Response):
     response.add_error(self.reason)
     return response