def _start_workflows_and_record_start_time( workflow_name: str, workflow_range: (int, int), workers: int = WORKERS_DEFAULT_COUNT) -> pd.DataFrame: logger.info(f"Starting {workflow_range} workflows...") df = create_empty_dataframe_for_started_results() with concurrent.futures.ThreadPoolExecutor( max_workers=workers) as executor: futures = [ executor.submit(_start_single_workflow, build_extended_workflow_name(workflow_name, i)) for i in range(workflow_range[0], workflow_range[1] + 1) ] for future in concurrent.futures.as_completed(futures): try: workflow_name, asked_to_start_datetime = future.result() df = df.append( { "name": workflow_name, "asked_to_start_date": asked_to_start_datetime, }, ignore_index=True, ) except Exception as e: logger.error(e) return df
def monitor(workflow: str, sleep: int) -> None: """Start periodically collect defined metrics and save them to JSON file. This function is blocking. """ _print_metrics() logger.info("Starting monitoring...") all_metrics = {} metrics_parameters = { "workflow": workflow, } try: while True: # if metrics will take, for example, couple of seconds to collect monitored_date will be less accurate monitored_date = get_utc_now_timestamp() collected_metrics = _collect_metrics(metrics_parameters) all_metrics[monitored_date] = collected_metrics _save_metrics(workflow, all_metrics) time.sleep(sleep) except KeyboardInterrupt: logger.info("Stopping monitoring...") finally: _save_metrics(workflow, all_metrics)
def _save_plots(plots: List[Tuple[str, Figure]], workflow: str, workflow_range: Tuple[int, int]) -> None: logger.info("Saving plots...") for base_name, figure in plots: path = Path( f"{workflow}_{base_name}_{workflow_range[0]}_{workflow_range[1]}.png" ) figure.savefig(path)
def _merge_workflows_and_started_results( workflows: pd.DataFrame, started_results: pd.DataFrame) -> pd.DataFrame: """Merge workflows status results and recorded started results. Required columns: name (workflow_name) """ logger.info("Merging workflows and started results...") return workflows.merge(started_results, on=["name"], how="left")
def start(workflow_name: str, workflow_range: (int, int), workers: int) -> None: """Start already submitted workflows.""" started_results = _start_workflows_and_record_start_time( workflow_name, workflow_range, workers) started_results = _append_to_existing_started_results( workflow_name, started_results) _save_started_results(workflow_name, started_results) logger.info("Finished starting workflows.")
def _append_to_existing_started_results( workflow_name: str, new_results: pd.DataFrame) -> pd.DataFrame: """Append new started results to existing started results and return them.""" results_path = build_started_results_path(workflow_name) existing_results = pd.DataFrame() if results_path.exists(): logger.info("Loading existing started results. Appending...") existing_results = pd.read_csv(results_path) return existing_results.append(new_results, ignore_index=True)
def _build_plots(df: pd.DataFrame, plot_parameters: Dict) -> List[Tuple[str, Figure]]: logger.info("Building plots...") plots = [] for build_plot in [ _build_execution_progress_plot, _build_execution_status_plot, _build_total_time_histogram, _build_runtime_histogram, _build_pending_time_histogram, ]: plot_base_name, figure = build_plot(df, plot_parameters) plots.append((plot_base_name, figure)) return plots
def _create_and_upload_workflows( workflow: str, workflow_range: (int, int), file: Optional[str] = None, workers: int = WORKERS_DEFAULT_COUNT, ) -> None: logger.info(f"Creating and uploading {workflow_range} workflows...") with concurrent.futures.ThreadPoolExecutor( max_workers=workers) as executor: futures = [ executor.submit( _create_and_upload_single_workflow, build_extended_workflow_name(workflow, i), file, ) for i in range(workflow_range[0], workflow_range[1] + 1) ] for future in concurrent.futures.as_completed(futures): # collect results, in case of exception, it will be raised here future.result()
def _clean_results(df: pd.DataFrame) -> pd.DataFrame: logger.info("Cleaning results...") # fix "-" values for created status df.loc[df["status"] == "created", "started"] = None df.loc[df["status"] == "created", "ended"] = None df["asked_to_start_date"] = df.apply( lambda row: None if pd.isna(row["asked_to_start_date"]) else row["asked_to_start_date"], axis=1, ) # fix "-" values for running, pending, queued statuses df.loc[df["status"] == "running", "ended"] = None df.loc[df["status"] == "pending", "started"] = None df.loc[df["status"] == "pending", "ended"] = None df.loc[df["status"] == "queued", "started"] = None df.loc[df["status"] == "queued", "ended"] = None return df
def collect(workflow_prefix: str, force: bool) -> None: # noqa: D103 results_path = build_started_results_path(workflow_prefix) if results_path.exists(): started_results = pd.read_csv(results_path) else: logger.warning("Started results are not found.") started_results = create_empty_dataframe_for_started_results() workflows = _get_workflows(workflow_prefix) if _workflows_finished(workflows) or force: results = _merge_workflows_and_started_results(workflows, started_results) results = _clean_results(results) collect_datetime = get_utc_now_timestamp() results["collected_date"] = [collect_datetime] * len(results) _save_collected_results(workflow_prefix, results) logger.info(f"Collected {len(results)} workflows. Finished.") else: logger.info( "Not collecting. Workflows are still running. Use -f option to force collect." )
def _derive_metrics(df: pd.DataFrame) -> pd.DataFrame: logger.info("Deriving metrics...") df["workflow_number"] = df.apply( lambda row: _get_workflow_number_from_name(row["name"]), axis=1) def _calculate_difference(row: pd.Series, start_column: str, end_column: str) -> Optional[int]: """Calculate difference between two date times in string format.""" start_date = row[start_column] end_date = row[end_column] start_date_exists = not pd.isna(start_date) end_date_exists = not pd.isna(end_date) if start_date_exists and end_date_exists: return _convert_str_date_to_epoch( end_date) - _convert_str_date_to_epoch(start_date) return None df["pending_time"] = df.apply( partial( _calculate_difference, start_column="asked_to_start_date", end_column="started", ), axis=1, ) df["runtime"] = df.apply( partial(_calculate_difference, start_column="started", end_column="ended"), axis=1, ) return df
def submit(workflow_prefix: str, workflow_range: (int, int), file: str, workers: int) -> None: """Submit multiple workflows, do not start them.""" _create_and_upload_workflows(workflow_prefix, workflow_range, file, workers) logger.info("Finished creating and uploading workflows.")
def _save_started_results(workflow_name: str, df: pd.DataFrame) -> None: logger.info("Saving started results...") results_path = build_started_results_path(workflow_name) df.to_csv(results_path, index=False)
def _print_metrics() -> None: logger.info("Following metrics will be collected:") for m in METRICS: logger.info(f"- {m.name}")
def _save_collected_results(workflow: str, df: pd.DataFrame): logger.info("Saving collected results...") results_path = build_collected_results_path(workflow) df.to_csv(results_path, index=False)