def GetExecutePipelineResults(self, request, context): session = Session.get(request.context.session_id) if session is not None: for pipeline_id in request.pipeline_ids: result = session.test_results.get(pipeline_id, None) if result is not None: yield result
def ExecutePipeline(self, request, context): """Predict step - multiple results messages returned via GRPC streaming. """ session = Session.get(request.context.session_id) if session is None: yield None return pipeline = session.get_pipeline(request.pipeline_id) if pipeline is None: yield None return datafile = None if request.dataset_uri is not None: datafile = self._create_path_from_uri(request.dataset_uri) # Get test data directories handler = GRPC_PlannerEventHandler(session) handler.StartExecutingPipeline(pipeline) session.controller.initialize_from_features(datafile, session.train_features, session.target_features, session.outputdir, view='TEST') # Change this to be a yield too.. Save should happen within the handler for result in session.controller.test(pipeline, handler): if result is not None: yield result
def ListPipelines(self, request, context): pipeline_ids = [] session = Session.get(request.context.session_id) if session is not None: for pipeline_id in session.pipelines.keys(): pipeline_ids.append(pipeline_id) response = self._create_response("Listing pipelines") return core.PipelineListResult(response_info=response, pipeline_ids=pipeline_ids)
def DeletePipelines(self, request, context): pipeline_ids = [] session = Session.get(request.context.session_id) if session is not None: for pipeline_id in request.delete_pipeline_ids: session.delete_pipeline(pipeline_id) pipeline_ids.append(pipeline_id) response = self._create_response("Deleted pipelines") return core.PipelineListResult(response_info=response, pipeline_ids=pipeline_ids)
def DescribeDataflow(self, request, context): ok = True response = self._create_response("Dataflow description") session = Session.get(request.context.session_id) if session is None: response = self._create_response("Dataflow description", code="SESSION_UNKNOWN") ok = False pipeline = session.get_pipeline(request.pipeline_id) if pipeline is None: response = self._create_response("Dataflow description", code="INTERNAL") ok = False modules = [] connections = [] if ok: for i in range(0, len(pipeline.primitives)): primitive = pipeline.primitives[i] inputs = [ dfext.DataflowDescription.Input(name="input_data", type="pandas.DataFrame") ] outputs = [ dfext.DataflowDescription.Output(name="output_data", type="pandas.DataFrame") ] if primitive.task == "Modeling": inputs.append( dfext.DataflowDescription.Input( name="input_labels", type="pandas.DataFrame")) modules.append( dfext.DataflowDescription.Module(id=primitive.cls, type=primitive.type, label=primitive.name, inputs=inputs, outputs=outputs)) if i > 0: prev_primitive = pipeline.primitives[i - 1] connections.append( dfext.DataflowDescription.Connection( from_module_id=prev_primitive.cls, from_output_name="output_data", to_module_id=primitive.cls, to_input_name="input_data")) return dfext.DataflowDescription(pipeline_id=request.pipeline_id, response_info=response, modules=modules, connections=connections)
def SetProblemDoc(self, request, context): session = Session.get(request.context.session_id) if session is not None: c = session.controller for update in request.updates: if update.task_type: # Get Problem details c.problem.task_type = TaskType[core.TaskType.Name(update.task_type)] if update.task_subtype: c.problem.task_subtype = TaskSubType[core.TaskSubtype.Name(update.task_subtype)] #if update.output_type: # c.problem.output_type = core.OutputType.Name(update.output_type) if update.metric: metric = Metric[core.PerformanceMetric.Name(update.metric)] c.problem.set_metrics([metric]) session.controller = c return self._create_response("Updated Problem Doc")
def ExportPipeline(self, request, context): session = Session.get(request.context.session_id) if session is None: return self._create_response("No such session", code="SESSION_UNKNOWN") pipeline = session.get_pipeline(request.pipeline_id) if pipeline is None: return self._create_response("Invalid pipeline id", code="INVALID_ARGUMENT") if request.pipeline_exec_uri is not None: ex = session.controller.execution_helper ex.create_pipeline_executable(pipeline, session.config) exefile = self._create_path_from_uri(request.pipeline_exec_uri) origfile = os.path.join(session.execdir, pipeline.id) shutil.copy(origfile, exefile) os.chmod(exefile, 0o755) return self._create_response("Pipeline Exported") else: return self._create_response("No pipeline export uri", code="INVALID_ARGUMENT")
def GetDataflowResults(self, request, context): session = Session.get(request.context.session_id) ok = True if session is None: response = self._create_response("Dataflow results", code="SESSION_UNKNOWN") ok = False pipeline = session.get_pipeline(request.pipeline_id) if pipeline is None: response = self._create_response("Dataflow results", code="INTERNAL") ok = False if ok: if pipeline.finished: for result in self._get_pipeline_results(pipeline): yield result else: while not pipeline.finished: pipeline.waitForChanges() for result in self._get_pipeline_results(pipeline): yield result else: yield dfext.ModuleResult(response_info=response)
def CreatePipelines(self, request, context): session = Session.get(request.context.session_id) if session is None: yield None return # Get training and target features train_features = [] target_features = [] if len(request.predict_features) > 0: for tfeature in request.predict_features: resource_id = tfeature.resource_id feature_name = tfeature.feature_name train_features.append(Feature(resource_id, feature_name)) if len(request.target_features) > 0: for tfeature in request.target_features: resource_id = tfeature.resource_id feature_name = tfeature.feature_name target_features.append(Feature(resource_id, feature_name)) datafile = None if request.dataset_uri is not None: datafile = self._create_path_from_uri(request.dataset_uri) session.train_features = train_features session.target_features = target_features # Create the planning controller if not already present c = session.controller if c is None: c = Controller(self.libdir) c.initialize_from_features(datafile, train_features, target_features, session.outputdir, view='TRAIN') # Set Problem schema if request.task > 0: c.problem.task_type = TaskType[core.TaskType.Name(request.task)] if request.task_subtype > 0: c.problem.task_subtype = TaskSubType[core.TaskSubtype.Name( request.task_subtype)] #if request.output > 0: # c.problem.output_type = core.OutputType.Name(request.output) # Load metrics metrics = [] if len(request.metrics) > 0: for rm in request.metrics: metrics.append(Metric[core.PerformanceMetric.Name(rm)]) c.problem.set_metrics(metrics) # Set the max pipelines cutoff cutoff = None if request.max_pipelines is not None: cutoff = request.max_pipelines # Start planning / training session.controller = c if session.controller.l1_planner is None: session.controller.initialize_planners() for result in session.controller.train( GRPC_PlannerEventHandler(session), cutoff=cutoff): if result is not None: yield result