def evaluate(test_job, autoencoder, reg, lods, observed_traces, observed_traces_slices, out_data, test_aliases=[]): # Get the trace(s) for this specific test job idx = observed_traces_slices[test_job] trace = observed_traces.a[idx], observed_traces.X[idx], \ observed_traces.Y[idx] # Extract encoding from given trace(s) and trained autoencoder extract_and_set_encoding(autoencoder, trace) proxy = extract_encoding_and_map_to_nearest(autoencoder, trace, lods.alias_to_id, test_aliases) out_data['mappings'][test_job].append(proxy) # Test data (without encodings) test = lods.test slices_test = test.slice_by_job_id(lods.alias_to_id) idxs_test = slices_test[test_job] X_test = np.hstack([test.a, test.X])[idxs_test, :] y_test = test.targets.ravel()[idxs_test] # Test data for regressor prediction (without mapping) X_test_, y_test_ = translate_X_y(X_test, y_test, autoencoder.centroids) # Test data for regressor prediction (with mapping) X_test__, y_test__ = translate_X_y(X_test, y_test, autoencoder.altered_centroids) reg_ = reg.clone() err = reg_.MAPE(X_test_, y_test_) logging.info("[Test job: {}] \t Error no-opt: {:.2f}".format( test_job, err)) reg_ = reg.clone() assert np.abs(err - reg_.MAPE(X_test_, y_test_)) < 1e-10 # Now let's use the data from mapping... err_mapping = reg_.MAPE(X_test__, y_test__) logging.info("[Test job: {}] \t Error map: {:.2f}".format( test_job, err_mapping)) # err, err_mapping out_data['errs']['no-opt'][test_job].append(err) out_data['errs']['map'][test_job].append(err_mapping)
def evaluate(test_job, autoencoder, reg, lods, observed_traces, observed_traces_slices, out_data, test_aliases=[]): n_knob_cols = len(lods.config['COLS_KNOBS']) # Get the trace(s) for this specific test job idx = observed_traces_slices[test_job] trace = observed_traces.a[idx], observed_traces.X[idx], \ observed_traces.Y[idx] # Extract encoding from given trace(s) and trained autoencoder extract_and_set_encoding(autoencoder, trace) proxy = extract_encoding_and_map_to_nearest(autoencoder, trace, lods.alias_to_id, test_aliases, within_template=False, metric='euclidean') out_data['mappings'][test_job].append(proxy) # Calibration data for autoencoder X_calib = np.hstack([observed_traces.a[idx], observed_traces.X[idx]]) y_calib = observed_traces.targets.ravel()[idx] # Calibration Data for regressor ('without mapping' case): X_calib_, y_calib_ = translate_X_y(X_calib, y_calib, autoencoder.centroids, n_knob_cols) # Calibration Data for regressor ('with mapping' case): X_calib__, y_calib__ = translate_X_y(X_calib, y_calib, autoencoder.altered_centroids, n_knob_cols) # Test data (without encodings) test = lods.test slices_test = test.slice_by_job_id(lods.alias_to_id) idxs_test = slices_test[test_job] X_test = np.hstack([test.a, test.X])[idxs_test, :] y_test = test.targets.ravel()[idxs_test] # Test data for regressor prediction (without mapping) X_test_, y_test_ = translate_X_y(X_test, y_test, autoencoder.centroids, n_knob_cols) # Test data for regressor prediction (with mapping) X_test__, y_test__ = translate_X_y(X_test, y_test, autoencoder.altered_centroids, n_knob_cols) reg_ = reg.clone() err = reg_.MAPE(X_test_, y_test_) logging.info("[Test job: {}] \t Error no-opt: {:.2f}".format( test_job, err)) reg_.calibrate(X_calib_, y_calib_) err_cal = reg_.MAPE(X_test_, y_test_) logging.info("[Test job: {}] \t Error cal: {:.2f}".format( test_job, err_cal)) reg_ = reg.clone() assert np.abs(err - reg_.MAPE(X_test_, y_test_)) < 1e-10 # Now let's use the data from mapping... err_mapping = reg_.MAPE(X_test__, y_test__) logging.info("[Test job: {}] \t Error map: {:.2f}".format( test_job, err_mapping)) reg_.calibrate(X_calib__, y_calib__) err_mapping_cal = reg_.MAPE(X_test__, y_test__) logging.info("[Test job: {}] \t Error map_and_cal: {:.2f}".format( test_job, err_mapping_cal)) # err, err_cal, err_mapping, err_mapping_cal out_data['errs']['no-opt'][test_job].append(err) out_data['errs']['cal'][test_job].append(err_cal) out_data['errs']['map'][test_job].append(err_mapping) out_data['errs']['map_then_cal'][test_job].append(err_mapping_cal)
def train_model_and_evaluate(lods, out_data, seed=10): np.random.seed(seed) tf.compat.v1.set_random_seed(seed) n_knob_cols = len(lods.config['COLS_KNOBS']) nn_params = HYPER_PARAMS['nn_params'] ae_params = HYPER_PARAMS['ae_params'] ae_params['knob_cols'] = lods.config['COLS_KNOBS'] ae_params['random_state'] = seed tmp_trainval = lods.trainval tmp_shared_trainval = lods.shared_trainval if N_TRAIN_PER_JOB != -1: tmp_trainval = lods.trainval.get_x(N_TRAIN_PER_JOB) if N_SHARED_TRAIN_PER_JOB != -1: tmp_shared_trainval = lods.shared_trainval.get_x( N_SHARED_TRAIN_PER_JOB) if tmp_trainval is not None: logging.info("shape of remaining trainval (X): {}".format( tmp_trainval.X.shape)) else: logging.info("tmp_trainval is None (perhaps because of get_x(0))") if tmp_shared_trainval is not None: logging.info("shape of remaining shared trainval (X): {}".format( tmp_shared_trainval.X.shape)) else: logging.info( "tmp_shared_trainval is None (perhaps because of get_x(0))") if tmp_trainval is None: # in case we're invoking dataset.get_x(0) ds_train = tmp_shared_trainval else: ds_train = tmp_trainval + tmp_shared_trainval X_train = np.hstack([ds_train.a, ds_train.X, ds_train.Y]) y_train = ds_train.targets.ravel() logging.info("Fitting autoencoder on data of shape: {}".format( X_train.shape)) # Make autoencoder and fit on loaded data autoencoder = FancyAutoEncoder.build(**ae_params) logging.info("Fitting autoencoder on data of shape: {}".format( X_train.shape)) if ENCODING_STRATEGY == 'shared': shared_train = lods.shared_trainval.get_x(N_OBS) X_shared_train = np.hstack( [shared_train.a, shared_train.X, shared_train.Y]) autoencoder.fit(X_train, centroids_strategy='shared', X_shared=X_shared_train, log_time=True) else: autoencoder.fit(X_train, log_time=True) # Get centroids of encodings for different workloads centroids = autoencoder.centroids # Adjust the X vector by transforming Y into job's centroid X, y = translate_X_y(X_train, y_train, centroids, n_knob_cols) # Make and fit a NN Regressor logging.info("Fitting regressor on data of shapes: {}, {}".format( X.shape, y.shape)) reg = NNregressor(with_calibration=True, v1_compat_mode=True, **nn_params, random_state=seed) reg.fit(X, y, log_time=True) training_mape = reg.MAPE(X, y) logging.info("Training Error: {:.2f}%".format(training_mape)) out_data['training_errs'].append(training_mape) if ENCODING_STRATEGY == 'shared': observed_traces = lods.shared_traincomplement.get_x(N_OBS) else: observed_traces = lods.traincomplement.get_x(N_OBS) logging.info("observed_traces description: ") observed_traces.describe() observed_traces_slices = observed_traces.slice_by_job_id( alias_to_id=lods.alias_to_id) test_aliases = sorted(list(set(lods.test.a.ravel()))) for test_job in observed_traces_slices: evaluate(test_job, autoencoder, reg, lods, observed_traces, observed_traces_slices, out_data, test_aliases) # Append trained autoencoder information (with centroids) to output_data out_data['autoencoders'].append(autoencoder.get_persist_info()) # Append trained regressor information to output_data out_data['regressors'].append(reg.get_persist_info()) persist_data(copyDict(out_data), DATA_FNAME)
def train_model_and_evaluate(lods, out_data, seed=10): np.random.seed(seed) tf.compat.v1.set_random_seed(seed) n_knob_cols = len(lods.config['COLS_KNOBS']) nn_params = HYPER_PARAMS['nn_params'] kpca_params = HYPER_PARAMS['kpca_params'] tmp_trainval = lods.trainval tmp_shared_trainval = lods.shared_trainval if N_TRAIN_PER_JOB != -1: tmp_trainval = lods.trainval.get_x(N_TRAIN_PER_JOB) if N_SHARED_TRAIN_PER_JOB != -1: tmp_shared_trainval = lods.shared_trainval.get_x( N_SHARED_TRAIN_PER_JOB) if tmp_trainval is not None: logging.info("shape of remaining trainval (X): {}".format( tmp_trainval.X.shape)) else: logging.info("tmp_trainval is None (perhaps because of get_x(0))") if tmp_shared_trainval is not None: logging.info("shape of remaining shared trainval (X): {}".format( tmp_shared_trainval.X.shape)) else: logging.info( "tmp_shared_trainval is None (perhaps because of get_x(0))") if tmp_trainval is None: # in case we're invoking dataset.get_x(0) ds_train = tmp_shared_trainval else: ds_train = tmp_trainval + tmp_shared_trainval X_train = np.hstack([ds_train.a, ds_train.X, ds_train.Y]) y_train = ds_train.targets.ravel() logging.info("Fitting KPCA on data of shape: {}".format(X_train.shape)) # Make PCA and fit on loaded data fit_t = time.time() pca = KernelPCA(**kpca_params) pca.altered_centroids = None logging.info("Fitting KPCA on data of shape: {}".format(ds_train.Y.shape)) if ENCODING_STRATEGY == 'shared': shared_train = lods.shared_trainval.get_x(N_OBS) pca.fit(ds_train.Y) encods_shared = pca.transform(shared_train.Y) centroids = compute_centroids(encods_shared, shared_train.a) else: encods = pca.fit_transform(ds_train.Y) centroids = compute_centroids(encods, ds_train.a) pca.centroids = centroids # thisis why I love Python! :-) fit_t = time.time() - fit_t logging.info("KPCA fitting time is: {} minutes and {} seconds".format( fit_t // 60, int(fit_t / 60))) # Adjust the X vector by transforming Y into job's centroid X, y = translate_X_y(X_train, y_train, pca.centroids, n_knob_cols) # Make and fit a NN Regressor logging.info("Fitting regressor on data of shapes: {}, {}".format( X.shape, y.shape)) reg = NNregressor(with_calibration=True, **nn_params, v1_compat_mode=True, random_state=seed) reg.fit(X, y, log_time=True) training_mape = reg.MAPE(X, y) logging.info("Training Error: {:.2f}%".format(training_mape)) out_data['training_errs'].append(training_mape) if ENCODING_STRATEGY == 'shared': observed_traces = lods.shared_traincomplement.get_x(N_OBS) else: observed_traces = lods.traincomplement.get_x(N_OBS) logging.info("observed_traces description: ") observed_traces.describe() observed_traces_slices = observed_traces.slice_by_job_id( alias_to_id=lods.alias_to_id) test_aliases = sorted(list(set(lods.test.a.ravel()))) for test_job in observed_traces_slices: evaluate(test_job, pca, reg, lods, observed_traces, observed_traces_slices, out_data, test_aliases) out_data['regressors'].append(reg.get_persist_info()) persist_data(copyDict(out_data), DATA_FNAME)
def train_model_and_evaluate(lods, triplet_idxs, out_data, seed=10, tf=None, TripletPlusPlus=None, NNregressor=None): np.random.seed(seed) tf.compat.v1.set_random_seed(seed) n_knob_cols = lods.trainval.X.shape[1] # 2. train the autoencoder on the triplets autoencoder_params = HYPER_PARAMS['encoder_params'] layer_sizes = [autoencoder_params['_nh'] ] * autoencoder_params['_nhlayers'] + [ ENCODING_SIZE + n_knob_cols ] del autoencoder_params['_nh'] del autoencoder_params['_nhlayers'] # Setting activations to be relu autoencoder_params['layer_sizes'] = layer_sizes autoencoder_params['activations'] = ['relu'] * ( len(autoencoder_params['layer_sizes']) - 1) + [None] # 561 metrics for streaming autoencoder_params['input_dim'] = lods.trainval.Y.shape[1] # n_knobs autoencoder_params['config_vec_size'] = n_knob_cols autoencoder = TripletPlusPlus(v1_compat_mode=True, **autoencoder_params) autoencoder.compile() autoencoder.centroids = None autoencoder.altered_centroids = None autoencoder.fit_idxs(triplet_idxs, fetch_triplets, lods, log_time=True) # 3. extract encodings for training workloads compute_centroids(autoencoder, lods, scheme=ENCODING_SCHEME) # 4. fetch training data for the regressor ds_train = lods.trainval + lods.shared_trainval X_ = np.hstack([ds_train.a, ds_train.X]) y_ = ds_train.targets.ravel() X, y = translate_X_y(X_, y_, autoencoder.centroids) # 5. Train a regressor on the training data nn_params = HYPER_PARAMS['nn_params'] logging.info("Fitting regressor on data of shapes: {}, {}".format( X.shape, y.shape)) reg = NNregressor(**nn_params, random_state=seed) reg.fit(X, y, log_time=True) training_mape = reg.MAPE(X, y) # 6. calculate the training error logging.info("Training Error: {:.2f}%".format(training_mape)) out_data['training_errs'].append(training_mape) # 7. get observed traces and evaluate on test jobs... if ENCODING_SCHEME == 'shared': observed_traces = lods.shared_traincomplement.get_x(N_OBS) else: observed_traces = lods.traincomplement.get_x(N_OBS) logging.info("observed_traces description: ") observed_traces.describe() observed_traces_slices = observed_traces.slice_by_job_id( alias_to_id=lods.alias_to_id) test_aliases = sorted(list(set(lods.test.a.ravel()))) for test_job in observed_traces_slices: evaluate(test_job, autoencoder, reg, lods, observed_traces, observed_traces_slices, out_data, test_aliases) # Append trained autoencoder information (with centroids) to output_data out_data['autoencoders'].append(autoencoder.get_persist_info()) # Append trained regressor information to output_data out_data['regressors'].append(reg.get_persist_info()) persist_data(copyDict(out_data), DATA_FNAME)
def train_model_and_evaluate(lods, out_data, seed=10): np.random.seed(seed) tf.compat.v1.set_random_seed(seed) ds_train = lods.trainval + lods.shared_trainval # 2. train the autoencoder ae_params = HYPER_PARAMS['ae_params'] ae_params['input_dim'] = ds_train.Y.shape[1] # 561 metrics snnae = SNNAE.build(**ae_params) snnae.compile() snnae.fit(ds_train.Y, ds_train.a, log_time=True) # 3. extract encodings for training workloads compute_centroids(snnae, lods, scheme=ENCODING_SCHEME) # 4. fetch training data for the regressor X_ = np.hstack([ds_train.a, ds_train.X]) y_ = ds_train.targets.ravel() X, y = translate_X_y(X_, y_, snnae.centroids) # 5. Train a regressor on the training data nn_params = HYPER_PARAMS['nn_params'] logging.info("Fitting regressor on data of shapes: {}, {}".format( X.shape, y.shape)) reg = NNregressor(**nn_params, v1_compat_mode=True, keras_2=True, random_state=seed) reg.fit(X, y, log_time=True) training_mape = reg.MAPE(X, y) # 6. calculate the training error logging.info("Training Error: {:.2f}%".format(training_mape)) out_data['training_errs'].append(training_mape) # 7. get observed traces and evaluate on test jobs... if ENCODING_SCHEME == 'shared': observed_traces = lods.shared_traincomplement.get_x(N_OBS) else: observed_traces = lods.traincomplement.get_x(N_OBS) logging.info("observed_traces description: ") observed_traces.describe() observed_traces_slices = observed_traces.slice_by_job_id( alias_to_id=lods.alias_to_id) test_aliases = sorted(list(set(lods.test.a.ravel()))) for test_job in observed_traces_slices: evaluate(test_job, snnae, reg, lods, observed_traces, observed_traces_slices, out_data, test_aliases) # Append trained encoder information (with centroids) to output_data out_data['autoencoders'].append(snnae.get_persist_info()) # Append trained regressor information to output_data out_data['regressors'].append(reg.get_persist_info()) persist_data(copyDict(out_data), DATA_FNAME)
def train_model_and_evaluate(lods, triplets, out_data, seed=10): np.random.seed(seed) tf.compat.v1.set_random_seed(seed) # 1. adjusts signature to take the triplets #FIXME Ya = triplets['Ya'] Yp = triplets['Yp'] Yn = triplets['Yn'] # 2. train the autoencoder on the triplets autoencoder_params = HYPER_PARAMS['autoencoder_params'] autoencoder_params['input_dim'] = Ya.shape[1] # 561 metrics autoencoder = TAutoEncoder(v1_compat_mode=True, **autoencoder_params) autoencoder.compile() autoencoder.centroids = None autoencoder.altered_centroids = None autoencoder.fit(Ya, Yp, Yn, log_time=True) # 3. extract encodings for training workloads compute_centroids(autoencoder, lods, scheme=ENCODING_SCHEME) # 4. fetch training data for the regressor ds_train = lods.trainval + lods.shared_trainval X_ = np.hstack( [ds_train.a, ds_train.X]) y_ = ds_train.targets.ravel() X, y = translate_X_y(X_, y_, autoencoder.centroids) # 5. Train a regressor on the training data nn_params = HYPER_PARAMS['nn_params'] logging.info( "Fitting regressor on data of shapes: {}, {}".format( X.shape, y.shape)) reg = NNregressor(**nn_params, random_state=seed) reg.fit(X, y, log_time=True) training_mape = reg.MAPE(X, y) # 6. calculate the training error logging.info("Training Error: {:.2f}%".format(training_mape)) out_data['training_errs'].append(training_mape) # 7. get observed traces and evaluate on test jobs... if ENCODING_SCHEME == 'shared': observed_traces = lods.shared_traincomplement.get_x(N_OBS) else: observed_traces = lods.traincomplement.get_x(N_OBS) logging.info("observed_traces description: ") observed_traces.describe() observed_traces_slices = observed_traces.slice_by_job_id( alias_to_id=lods.alias_to_id) test_aliases = sorted(list(set(lods.test.a.ravel()))) for test_job in observed_traces_slices: evaluate(test_job, autoencoder, reg, lods, observed_traces, observed_traces_slices, out_data, test_aliases) # Append trained autoencoder information (with centroids) to output_data out_data['autoencoders'].append(autoencoder.get_persist_info()) # Append trained regressor information to output_data out_data['regressors'].append(reg.get_persist_info()) persist_data(copyDict(out_data), DATA_FNAME)