def do_search(core, problem, dataset_path, time_bound=30.0, pipelines_limit=0, pipeline_template=None): version = pb_core.DESCRIPTOR.GetOptions().Extensions[ pb_core.protocol_version] search = core.SearchSolutions( pb_core.SearchSolutionsRequest( user_agent='ta3_stub', version=version, time_bound_search=time_bound, rank_solutions_limit=pipelines_limit, allowed_value_types=['CSV_URI'], problem=encode_problem_description(problem), template=pipeline_template, inputs=[pb_value.Value(dataset_uri='file://%s' % dataset_path, )], )) start_time = datetime.datetime.now() results = core.GetSearchSolutionsResults( pb_core.GetSearchSolutionsResultsRequest(search_id=search.search_id, )) solutions = {} for result in results: if result.solution_id: end_time = datetime.datetime.now() solutions[result.solution_id] = (result.internal_score, result.scores, str(end_time - start_time)) return str(search.search_id), solutions
def do_score(core, problem, solutions, dataset_path): metrics = [] for metric in problem['problem']['performance_metrics']: metrics.append(encode_performance_metric(metric)) for solution in solutions: try: response = core.ScoreSolution( pb_core.ScoreSolutionRequest( solution_id=solution, inputs=[ pb_value.Value(dataset_uri='file://%s' % dataset_path, ) ], performance_metrics=metrics, users=[], configuration=pb_core.ScoringConfiguration( method='K_FOLD', folds=4, train_test_ratio=0.75, shuffle=True, random_seed=0), )) results = core.GetScoreSolutionResults( pb_core.GetScoreSolutionResultsRequest( request_id=response.request_id, )) for _ in results: pass except Exception: logger.exception("Exception during scoring %r", solution)
def do_test(core, fitted, dataset_path): tested = {} for fitted_solution in fitted.values(): try: response = core.ProduceSolution( pb_core.ProduceSolutionRequest( fitted_solution_id=fitted_solution, inputs=[ pb_value.Value(dataset_uri='file://%s' % dataset_path, ) ], expose_outputs=['outputs.0'], expose_value_types=['CSV_URI'], users=[], )) results = core.GetProduceSolutionResults( pb_core.GetProduceSolutionResultsRequest( request_id=response.request_id, )) for result in results: if result.progress.state == pb_core.COMPLETED: tested[fitted_solution] = result.exposed_outputs[ 'outputs.0'].csv_uri except Exception: logger.exception("Exception testing %r", fitted_solution) return tested
def GetFitSolutionResults(self, request, context): """Wait for a training job to be done. """ try: job_id = int(request.request_id, 16) queue = self._requests[job_id] except (ValueError, KeyError): raise error(context, grpc.StatusCode.NOT_FOUND, "Unknown ID %r", request.request_id) for event, kwargs in queue.read(): if not context.is_active(): logger.info("Client closed GetFitSolutionsResults stream") break if event == 'training_start': yield pb_core.GetFitSolutionResultsResponse( progress=pb_core.Progress( state=pb_core.RUNNING, status="Training in progress", ), ) elif event == 'training_success': pipeline_id = kwargs['pipeline_id'] storage_dir = kwargs['storage_dir'] steps_to_expose = kwargs['steps_to_expose'] yield pb_core.GetFitSolutionResultsResponse( progress=pb_core.Progress( state=pb_core.COMPLETED, status="Training completed", ), exposed_outputs={ step_id: pb_value.Value(csv_uri='file://%s/fit_%s_%s.csv' % (storage_dir, pipeline_id, step_id)) for step_id in steps_to_expose }, fitted_solution_id=str(pipeline_id), ) break elif event == 'training_error': status = kwargs['error_msg'] yield pb_core.GetFitSolutionResultsResponse( progress=pb_core.Progress( state=pb_core.ERRORED, status=status, ), ) break elif event == 'done_searching': break
def GetScoreSolutionResults(self, request, context): """Wait for a scoring job to be done. """ try: job_id = int(request.request_id, 16) queue = self._requests[job_id] except (ValueError, KeyError): raise error(context, grpc.StatusCode.NOT_FOUND, "Unknown ID %r", request.request_id) for event, kwargs in queue.read(): if not context.is_active(): logger.info("Client closed GetScoreSolutionResults stream") break if event == 'scoring_start': yield pb_core.GetScoreSolutionResultsResponse( progress=pb_core.Progress( state=pb_core.RUNNING, status="Scoring in progress", ), ) elif event == 'scoring_success': pipeline_id = kwargs['pipeline_id'] scores = self._ta2.get_pipeline_scores(pipeline_id) scores = [ pb_core.Score( metric=pb_problem.ProblemPerformanceMetric( metric=m, k=0, pos_label=''), value=pb_value.Value(raw=pb_value.ValueRaw(double=s)), ) for m, s in scores.items() ] yield pb_core.GetScoreSolutionResultsResponse( progress=pb_core.Progress( state=pb_core.COMPLETED, status="Scoring completed", ), scores=scores, ) break elif event == 'scoring_error': status = kwargs['error_msg'] yield pb_core.GetScoreSolutionResultsResponse( progress=pb_core.Progress( state=pb_core.ERRORED, status=status, ), ) break
def msg_solution(pipeline_id): scores = self._ta2.get_pipeline_scores(pipeline_id) progress = session.progress if scores: if session.metrics and session.metrics[0][ 'metric'].name in scores: metric = session.metrics[0]['metric'] try: internal_score = metric.normalize(scores[metric.name]) except: internal_score = scores[metric.name] logger.warning( 'Problems normalizing metric, using the raw value: %.2f' % scores[metric.name]) else: internal_score = float('nan') scores = [ pb_core.Score( metric=pb_problem.ProblemPerformanceMetric( metric=m, k=0, pos_label=''), value=pb_value.Value(raw=pb_value.ValueRaw(double=s)), ) for m, s in scores.items() ] scores = [pb_core.SolutionSearchScore(scores=scores)] return pb_core.GetSearchSolutionsResultsResponse( done_ticks=progress.current, all_ticks=progress.total, progress=pb_core.Progress( state=pb_core.RUNNING, status="Solution scored", start=to_timestamp(session.start), ), solution_id=str(pipeline_id), internal_score=internal_score, scores=scores, )
def do_train(core, solutions, dataset_path): fitted = {} for solution in solutions: try: response = core.FitSolution( pb_core.FitSolutionRequest( solution_id=solution, inputs=[ pb_value.Value(dataset_uri='file://%s' % dataset_path, ) ], expose_outputs=['outputs.0'], expose_value_types=['CSV_URI'], users=[], )) results = core.GetFitSolutionResults( pb_core.GetFitSolutionResultsRequest( request_id=response.request_id, )) for result in results: if result.progress.state == pb_core.COMPLETED: fitted[solution] = result.fitted_solution_id except Exception: logger.exception("Exception training %r", solution) return fitted
def _add_step(self, steps, step_descriptions, modules, params, module_to_step, mod): if mod.id in module_to_step: return module_to_step[mod.id] # Special case: the "dataset" module if mod.package == 'data' and mod.name == 'dataset': module_to_step[mod.id] = 'inputs.0' return 'inputs.0' elif mod.package != 'd3m': raise ValueError("Got unknown module '%s:%s'" % (mod.package, mod.name)) # Recursively walk upstream modules (to get `steps` in topological # order) # Add inputs to a dictionary, in deterministic order inputs = {} for conn in sorted(mod.connections_to, key=lambda c: c.to_input_name): step = self._add_step(steps, step_descriptions, modules, params, module_to_step, modules[conn.from_module_id]) if step.startswith('inputs.'): inputs[conn.to_input_name] = step else: inputs[conn.to_input_name] = '%s.%s' % (step, conn.from_output_name) klass = d3m_ta2_nyu.workflow.convert.get_class(mod.name) metadata = klass.metadata.query() metadata_items = { key: metadata[key] for key in ('id', 'version', 'python_path', 'name', 'digest') if key in metadata } arguments = { name: pb_pipeline.PrimitiveStepArgument( container=pb_pipeline.ContainerArgument(data=data, )) for name, data in inputs.items() } # If hyperparameters are set, export them step_hyperparams = {} if mod.id in params and 'hyperparams' in params[mod.id]: hyperparams = pickle.loads(params[mod.id]['hyperparams']) for k, v in hyperparams.items(): step_hyperparams[k] = pb_pipeline.PrimitiveStepHyperparameter( value=pb_pipeline.ValueArgument(data=pb_value.Value( raw=encode_raw_value(v)))) # Create step description step = pb_pipeline.PipelineDescriptionStep( primitive=pb_pipeline.PrimitivePipelineDescriptionStep( primitive=pb_primitive.Primitive( id=metadata_items['id'], version=metadata_items['version'], python_path=metadata_items['python_path'], name=metadata_items['name'], digest=metadata_items['digest']), arguments=arguments, outputs=[pb_pipeline.StepOutput(id='produce')], hyperparams=step_hyperparams, )) step_descriptions.append( # FIXME it's empty pb_core.StepDescription( primitive=pb_core.PrimitiveStepDescription())) step_nb = 'steps.%d' % len(steps) steps.append(step) module_to_step[mod.id] = step_nb return step_nb