def search(dataset_root, problem, args): pps = PipelineSearcher( args.input, args.output, args.static, dump=True, hard_timeout=args.hard, ) return pps.search(problem, args.timeout, args.budget, args.template)
def search(dataset_root, problem, args): pps = PipelineSearcher(args.input, args.output, dump=True) return pps.search(problem, timeout=args.timeout, budget=args.budget)
def process_dataset(dataset_name, dataset, problem, args): box_print("Processing dataset {}".format(dataset_name), True) output_path = os.path.join(args.output, dataset_name) os.makedirs(output_path, exist_ok=True) LOGGER.info("Searching Pipeline for dataset {}".format(dataset_name)) try: start_ts = datetime.utcnow() pps = PipelineSearcher(args.input, output_path, args.static, dump=True, hard_timeout=args.hard, ignore_errors=args.ignore_errors, cv_folds=args.folds, subprocess_timeout=args.subprocess_timeout, max_errors=args.max_errors, store_summary=True) result = pps.search(dataset, problem, args.timeout, args.budget, args.templates_csv) result['elapsed'] = datetime.utcnow() - start_ts result['dataset'] = dataset_name except Exception as ex: result = { 'dataset': dataset_name, 'error': '{}: {}'.format(type(ex).__name__, ex), } else: try: summary = result.pop('summary') candidates = _select_candidates(summary) if candidates.empty: box_print('No valid pipelines found for dataset {}'.format( dataset_name)) else: ranked_path = os.path.join(output_path, 'pipelines_ranked') test_scores = list() for _, candidate in candidates.iterrows(): try: pipeline = candidate.pipeline pipeline_path = os.path.join(ranked_path, pipeline) test_score = score_pipeline(dataset, problem, pipeline_path, args.static, output_path) test_scores.append(test_score) except Exception: test_scores.append(None) candidates['test_score'] = test_scores candidates = candidates.sort_values('test_score', ascending=False) best = candidates.iloc[0] result['test_score'] = best.test_score result['template'] = best.template result['cv_score'] = best.score box_print('Best pipelines for dataset {}:\n{}'.format( dataset_name, candidates.to_string())) except Exception as ex: LOGGER.exception('Error while testing the winner pipeline') result['error'] = 'TEST Error: {}'.format(ex) return result