# Net data size to scrape from prometheus data_size = str(os.getenv('DATA_SIZE', '1h')) train_schedule = int(os.getenv('TRAINING_REPEAT_HOURS', 6)) TRUE_LIST = ["True", "true", "1", "y"] store_intermediate_data = os.getenv( "STORE_INTERMEDIATE_DATA", "False") # Setting this to true will store intermediate dataframes to ceph if str(os.getenv('GET_OLDER_DATA', "False")) in TRUE_LIST: print( "Collecting previously stored data from {}".format(data_storage_path)) data_dict = cp().get_latest_df_dict( data_storage_path ) # Need error handling inside this function, in case the storage path does not exist pass else: data_dict = {} config_list = [] fixed_label_config = str( os.getenv("LABEL_CONFIG", None) ) # by default it will train for all label configurations. WARNING: Tthat might take a lot of time depending on your metrics and cpu if fixed_label_config != "None": config_list = fixed_label_config.split( ";") # Separate multiple label configurations using a ';' (semi-colon) fixed_label_config_dict = literal_eval( config_list[0]) # # TODO: Add more error handling here print(fixed_label_config_dict)
def job(current_time): # TODO: Replace this function with model training function and set up the correct IntervalTrigger time global data_dict, predictions_dict_prophet, predictions_dict_fourier, current_metric_metadata, current_metric_metadata_dict, data_window, url, token, chunk_size, data_size, TRUE_LIST, store_intermediate_data global data, config_list # iteration += 1 start_time = time.time() prom = Prometheus(url=url, token=token, data_chunk=chunk_size, stored_data=data_size) metric = prom.get_metric(metric_name) print("metric collected.") # Convert data to json metric = json.loads(metric) # Metric Json is converted to a shaped dataframe data_dict = get_df_from_json( metric, data_dict, data_window ) # This dictionary contains all the sub-labels as keys and their data as Pandas DataFrames del metric, prom if str(store_intermediate_data) in TRUE_LIST: print( "DataFrame stored at: ", cp().store_data(metric_name, pickle.dumps(data_dict), (data_storage_path + str(datetime.now().strftime('%Y%m%d%H%M'))))) pass if fixed_label_config != "None": #If a label config has been specified single_label_data_dict = {} # split into multiple label configs existing_config_list = list(data_dict.keys()) # print(existing_config_list) for config in config_list: config_found = False for existing_config in existing_config_list: if SortedDict(literal_eval(existing_config)) == SortedDict( literal_eval(config)): single_label_data_dict[existing_config] = data_dict[ existing_config] config_found = True pass if not config_found: print("Specified Label Configuration {} was not found".format( config)) # raise KeyError pass # single_label_data_dict[config] = data_dict[config] pass # single_label_data_dict[fixed_label_config] = data_dict[fixed_label_config] current_metric_metadata = list(single_label_data_dict.keys())[0] current_metric_metadata_dict = literal_eval(current_metric_metadata) print(data_dict[current_metric_metadata].head(5)) print(data_dict[current_metric_metadata].tail(5)) print("Using the default label config") predictions_dict_prophet = predict_metrics(single_label_data_dict) # print(single_label_data_dict) predictions_dict_fourier = predict_metrics_fourier( single_label_data_dict) pass else: for x in data_dict: print(data_dict[x].head(5)) print(data_dict[x].tail(5)) break pass predictions_dict_prophet = predict_metrics(data_dict) predictions_dict_fourier = predict_metrics_fourier(data_dict) # TODO: Trigger Data Pruning here function_run_time = time.time() - start_time print( "Total time taken to train was: {} seconds.".format(function_run_time)) pass
chunk_size = str(os.getenv('CHUNK_SIZE', '1d')) # Net data size to scrape from prometheus data_size = str(os.getenv('DATA_SIZE', '1d')) train_schedule = int(os.getenv('TRAINING_REPEAT_HOURS', 6)) TRUE_LIST = ["True", "true", "1", "y"] store_intermediate_data = os.getenv( "STORE_INTERMEDIATE_DATA", "False") # Setting this to true will store intermediate dataframes to ceph if str(os.getenv('GET_OLDER_DATA', "False")) in TRUE_LIST: print("Collecting previously stored data.........") data_dict = cp().get_latest_df_dict(data_storage_path) pass else: data_dict = {} default_label_config = "{'__name__': 'kubelet_docker_operations_latency_microseconds', 'beta_kubernetes_io_arch': 'amd64', 'beta_kubernetes_io_os': 'linux', 'instance': 'cpt-0001.datahub.prod.upshift.rdu2.redhat.com', 'job': 'kubernetes-nodes', 'kubernetes_io_hostname': 'cpt-0001.datahub.prod.upshift.rdu2.redhat.com', 'node_role_kubernetes_io_compute': 'true', 'operation_type': 'create_container', 'provider': 'rhos', 'quantile': '0.5', 'region': 'compute', 'size': 'small'}" fixed_label_config = str(os.getenv("LABEL_CONFIG", default_label_config)) fixed_label_config_dict = literal_eval( fixed_label_config) # # TODO: Add more error handling here predictions_dict_prophet = {} predictions_dict_fourier = {} current_metric_metadata = "" current_metric_metadata_dict = {}