def get_layout_from_study(study_id): """ params: study_id: study id provided outpus: scatter plot for runs and studies combined """ items = study.get_study(int(study_id)) run_ids = items.runs[1:10] item = evaluations.list_evaluations('predictive_accuracy', id=run_ids, output_format='dataframe', per_fold=False) layout = html.Div([ dcc.Dropdown( id = 'dropdown-study', options = [ {'label':'mean-value', 'value':'0'}, {'label':'folded', 'value':'1'} ], value = '0' ), html.Div(id='scatterplot-study'), ], style={"fontFamily": font}) return layout, item
def generate_files(study_id, measure): # Fetch all its evaluations for a specific study print("Fetching evaluation results from OpenML...") study = get_study(study_id) evaluations = list_all(list_evaluations, measure, setup=study.setups, task=study.tasks) setup_flowid = {} task_data_id = {} setup_name = {} task_setup_result = {} task_qualities = {} tasks = set() setups = set() # obtain the data and book keeping for run_id in evaluations.keys(): task_id = evaluations[run_id].task_id flow_id = evaluations[run_id].flow_id data_id = evaluations[run_id].data_id setup_id = evaluations[run_id].setup_id value = evaluations[run_id].value task_data_id[task_id] = data_id setup_flowid[setup_id] = flow_id tasks.add(task_id) setups.add(setup_id) if task_id not in task_setup_result: task_setup_result[task_id] = {} task_setup_result[task_id][setup_id] = value print("Fetching meta-features from OpenML...") # obtain the meta-features complete_quality_set = None for task_id in tasks: try: qualities = openml.datasets.get_dataset( task_data_id[task_id]).qualities task_qualities[task_id] = qualities if complete_quality_set is None: complete_quality_set = qualities.keys() else: complete_quality_set = complete_quality_set & qualities.keys() except arff.BadDataFormat: print("Error parsing dataset: " + str(task_data_id[task_id])) except arff.BadAttributeType: print("Error parsing dataset: " + str(task_data_id[task_id])) complete_quality_set = list(complete_quality_set) print("Exporting evaluations...") for setup_id in setups: flow = openml.flows.get_flow(setup_flowid[setup_id]) setup_name[setup_id] = "%s_%s" % (setup_id, flow.name) run_data = [] for task_id in tasks: for setup_id in setups: if setup_id in task_setup_result[task_id]: perf = task_setup_result[task_id][setup_id] status = "ok" else: perf = 0 status = "other" run_data.append([task_id, "1", setup_name[setup_id], perf, status]) run_attributes = [["openml_task_id", "STRING"], ["repetition", "NUMERIC"], ["algorithm", "STRING"], [measure, "NUMERIC"], [ "runstatus", [ "ok", "timeout", "memout", "not_applicable", "crash", "other" ] ]] run_arff = { "attributes": run_attributes, "data": run_data, "relation": "RUN_EVALUATIONS" } with open( "output/study_" + str(study_id) + "_run_evaluations_" + measure + ".arff", "w") as fp: arff.dump(run_arff, fp) print("Exporting meta-features...") qualities_attributes = [["openml_task_id", "STRING"], ["repetition", "NUMERIC"]] for f in complete_quality_set: qualities_attributes.append([f, "NUMERIC"]) qualities_data = [] for task_id in tasks: current_line = [task_id, "1"] for idx, quality in enumerate(complete_quality_set): current_value = task_qualities[task_id][quality] current_line.append(current_value) qualities_data.append(current_line) qualities_arff = { "attributes": qualities_attributes, "data": qualities_data, "relation": "METAFEATURES" } with open("output/study_" + str(study_id) + "_metafeatures.arff", "w") as fp: arff.dump(qualities_arff, fp) print("Exporting joint table...") eval_labels = [ 'openml_task_id', 'repetition', 'algorithm', measure, 'runstatus' ] df_evals = pd.DataFrame.from_records(run_data, columns=eval_labels) quality_labels = ['openml_task_id', 'repetition'] quality_labels.extend(complete_quality_set) df_qualities = pd.DataFrame.from_records(qualities_data, columns=quality_labels) joint_data = pd.merge(df_evals, df_qualities, how='left', on=['openml_task_id', 'repetition']) joint_attributes = [["openml_task_id", "STRING"], ["repetition", "NUMERIC"], ["algorithm", "STRING"], [measure, "NUMERIC"]] for f in complete_quality_set: joint_attributes.append([f, "NUMERIC"]) joint_arff = { "attributes": joint_attributes, "data": joint_data, "relation": "JOINTMETADATA" } with open("output/study_" + str(study_id) + "_joint.arff", "w") as fp: arff.dump(joint_arff, fp)
def get_layout_from_study(study_id): """Generate the layout for the study dashboard. Data content (graphs, tables) is generated through callbacks. study_id: id of the study to generate the dashboard for. returns: a html.Div element with child elements containing all UI elements and parent divs for data content. """ # Results may be shown in aggregate (mean of folds), or per-fold: graph_type_dropdown = dcc.Dropdown( id="graph-type-dropdown", options=[ { "label": "Show scatter plot of results", "value": "scatter" }, { "label": "Show parallel line plot of results", "value": "parallel" }, ], value="scatter", ) # We construct the metric dropdown menu dynamically from computed metrics. # Simply listing all metrics (evaluations.list_evaluation_measures) # might include metrics that are not recorded. this_study = study.get_study(int(study_id)) first_run = runs.get_run(this_study.runs[0]) # The full list of metrics contain 'prior' metrics, which are not # dependent on models but on the given dataset. # Moreover some metrics don't make sense as a metric # (I think there are more, but I don't understand all 'metrics'): illegal_metrics = ["number_of_instances", "os_information"] metrics = [ metric for metric in first_run.evaluations if metric not in illegal_metrics and "prior" not in metric ] if "predictive_accuracy" in metrics: default_metric = "predictive_accuracy" elif "root_mean_squared_error" in metrics: default_metric = "root_mean_squared_error" else: default_metric = metrics[0] metric_dropdown = dcc.Dropdown( id="metric-dropdown", options=[{ "label": metric.replace("_", " ").title(), "value": metric } for metric in metrics], value=default_metric, ) show_fold_checkbox = dcc.Checklist( id="show-fold-checkbox", options=[{ "label": "Show results for each fold (can be slow)", "value": "fold" }], value=[], ) dataset_table = create_dataset_overview_table( id_="dataset-table", dataset_ids=this_study.data, columns=[ ("Name", "name"), ("Instances", "NumberOfInstances"), ("Features", "NumberOfFeatures"), # Should only be included for studies with classification tasks. ("Classes", "NumberOfClasses"), ("Missing Values", "NumberOfMissingValues"), ("Numeric Features", "NumberOfNumericFeatures"), ("Categorical Features", "NumberOfSymbolicFeatures"), ], ) layout = html.Div([ graph_type_dropdown, metric_dropdown, show_fold_checkbox, html.Div(id="graph-div"), dataset_table, ]) return layout
def get_layout_from_study(study_id): """ Generate the layout for the study dashboard. Data content (graphs, tables) is generated through callbacks. study_id: id of the study to generate the dashboard for. returns: a html.Div element with child elements containing all UI elements and parent divs for data content. """ # Results may be shown in aggregate (mean of folds), or per-fold: graph_type_dropdown = dcc.Dropdown( id='graph-type-dropdown', options=[{ 'label': 'Show scatter plot of results', 'value': 'scatter' }, { 'label': 'Show parallel line plot of results', 'value': 'parallel' }], value='scatter') # We construct the metric dropdown menu dynamically from computed metrics. # Simply listing all metrics (evaluations.list_evaluation_measures) # might include metrics that are not recorded. this_study = study.get_study(int(study_id)) first_run = runs.get_run(this_study.runs[0]) # The full list of metrics contain 'prior' metrics, which are not # dependent on models but on the given dataset. # Moreover some metrics don't make sense as a metric # (I think there are more, but I don't understand all 'metrics'): illegal_metrics = ['number_of_instances', 'os_information'] metrics = [ metric for metric in first_run.evaluations if metric not in illegal_metrics and 'prior' not in metric ] if 'predictive_accuracy' in metrics: default_metric = 'predictive_accuracy' elif 'root_mean_squared_error' in metrics: default_metric = 'root_mean_squared_error' else: default_metric = metrics[0] metric_dropdown = dcc.Dropdown(id='metric-dropdown', options=[{ 'label': metric.replace('_', ' ').title(), 'value': metric } for metric in metrics], value=default_metric) show_fold_checkbox = dcc.Checklist( id='show-fold-checkbox', options=[{ 'label': 'Show results for each fold (can be slow)', 'value': 'fold' }], value=[]) dataset_table = create_dataset_overview_table( id_='dataset-table', dataset_ids=this_study.data, columns=[ ('Name', 'name'), ('Instances', 'NumberOfInstances'), ('Features', 'NumberOfFeatures'), # Should only be included for studies with classification tasks. ('Classes', 'NumberOfClasses'), ('Missing Values', 'NumberOfMissingValues'), ('Numeric Features', 'NumberOfNumericFeatures'), ('Categorical Features', 'NumberOfSymbolicFeatures') ]) layout = html.Div([ graph_type_dropdown, metric_dropdown, show_fold_checkbox, html.Div(id='graph-div'), dataset_table ]) return layout