def main(dataset, subdataset): logging.basicConfig( level="INFO", format="%(asctime)s [%(levelname)s] %(name)s: %(message)s") # # prepare the data # (x_train, _), (x_test, y_test) = get_data( # config.dataset, # config.max_train_size, # config.max_test_size, # train_start=config.train_start, # test_start=config.test_start, # ) (x_train, _), (x_test, y_test) = load_dataset(dataset, subdataset) tf.reset_default_graph() # construct the model under `variable_scope` named 'model' with tf.variable_scope("model") as model_vs: model = OmniAnomaly(config=config, name="model") # construct the trainer trainer = Trainer( model=model, model_vs=model_vs, max_epoch=config.max_epoch, batch_size=config.batch_size, valid_batch_size=config.test_batch_size, initial_lr=config.initial_lr, lr_anneal_epochs=config.lr_anneal_epoch_freq, lr_anneal_factor=config.lr_anneal_factor, grad_clip_norm=config.gradient_clip_norm, valid_step_freq=config.valid_step_freq, ) # construct the predictor predictor = Predictor( model, batch_size=config.batch_size, n_z=config.test_n_z, last_point_only=True, ) with tf.Session().as_default(): if config.restore_dir is not None: # Restore variables from `save_dir`. saver = VariableSaver(get_variables_as_dict(model_vs), config.restore_dir) saver.restore() if config.max_epoch > 0: # train the model train_start = time.time() best_valid_metrics = trainer.fit(x_train) train_time = time.time() - train_start # best_valid_metrics.update({"train_time": train_time}) else: best_valid_metrics = {} # get score of train set for POT algorithm train_score, train_z, train_pred_speed = predictor.get_score( x_train) if config.train_score_filename is not None: with open( os.path.join(config.result_dir, config.train_score_filename), "wb") as file: pickle.dump(train_score, file) if config.save_z: save_z(train_z, "train_z") if x_test is not None: # get score of test set test_start = time.time() test_score, test_z, pred_speed = predictor.get_score(x_test) test_time = time.time() - test_start if config.save_z: save_z(test_z, "test_z") best_valid_metrics.update({ "pred_time": pred_speed, "pred_total_time": test_time }) if config.test_score_filename is not None: with open( os.path.join(config.result_dir, config.test_score_filename), "wb", ) as file: pickle.dump(test_score, file) if y_test is not None and len(y_test) >= len(test_score): if config.get_score_on_dim: # get the joint score test_score = np.sum(test_score, axis=-1) train_score = np.sum(train_score, axis=-1) # get best f1 t, th = bf_search( test_score, y_test[-len(test_score):], start=config.bf_search_min, end=config.bf_search_max, step_num=int( abs(config.bf_search_max - config.bf_search_min) / config.bf_search_step_size), display_freq=50, ) # get pot results pot_result = pot_eval( train_score, test_score, y_test[-len(test_score):], level=config.level, ) # output the results best_valid_metrics.update({ "best-f1": t[0], "precision": t[1], "recall": t[2], "TP": t[3], "TN": t[4], "FP": t[5], "FN": t[6], "latency": t[-1], "threshold": th, "test_score": test_score, "labels": y_test[-len(test_score):], }) best_valid_metrics.update(pot_result) results.update_metrics(best_valid_metrics) if config.save_dir is not None: # save the variables var_dict = get_variables_as_dict(model_vs) saver = VariableSaver(var_dict, config.save_dir) saver.save() print("=" * 30 + "result" + "=" * 30) pprint(best_valid_metrics) return best_valid_metrics
def main(): logging.basicConfig( level='INFO', format='%(asctime)s [%(levelname)s] %(name)s: %(message)s') # prepare the data (x_train, _), (x_test, y_test) = \ get_data(config.dataset, config.max_train_size, config.max_test_size, train_start=config.train_start, test_start=config.test_start) # construct the model under `variable_scope` named 'model' with tf.variable_scope('model') as model_vs: model = OmniAnomaly(config=config, name="model") # construct the trainer trainer = Trainer(model=model, model_vs=model_vs, max_epoch=config.max_epoch, batch_size=config.batch_size, valid_batch_size=config.test_batch_size, initial_lr=config.initial_lr, lr_anneal_epochs=config.lr_anneal_epoch_freq, lr_anneal_factor=config.lr_anneal_factor, grad_clip_norm=config.gradient_clip_norm, valid_step_freq=config.valid_step_freq) # construct the predictor predictor = Predictor(model, batch_size=config.batch_size, n_z=config.test_n_z, last_point_only=True) with tf.Session().as_default(): if config.restore_dir is not None: # Restore variables from `save_dir`. saver = VariableSaver(get_variables_as_dict(model_vs), config.restore_dir) saver.restore() if config.max_epoch > 0: # train the model train_start = time.time() best_valid_metrics = trainer.fit(x_train) train_time = (time.time() - train_start) / config.max_epoch best_valid_metrics.update({'train_time': train_time}) else: best_valid_metrics = {} # get score of train set for POT algorithm train_score, train_z, train_pred_speed = predictor.get_score( x_train) if config.train_score_filename is not None: with open( os.path.join(config.result_dir, config.train_score_filename), 'wb') as file: pickle.dump(train_score, file) if config.save_z: save_z(train_z, 'train_z') if x_test is not None: # get score of test set test_start = time.time() test_score, test_z, pred_speed = predictor.get_score(x_test) test_time = time.time() - test_start if config.save_z: save_z(test_z, 'test_z') best_valid_metrics.update({ 'pred_time': pred_speed, 'pred_total_time': test_time }) if config.test_score_filename is not None: with open( os.path.join(config.result_dir, config.test_score_filename), 'wb') as file: pickle.dump(test_score, file) if y_test is not None and len(y_test) >= len(test_score): if config.get_score_on_dim: # get the joint score test_score = np.sum(test_score, axis=-1) train_score = np.sum(train_score, axis=-1) # get best f1 t, th = bf_search( test_score, y_test[-len(test_score):], start=config.bf_search_min, end=config.bf_search_max, step_num=int( abs(config.bf_search_max - config.bf_search_min) / config.bf_search_step_size), display_freq=50) # get pot results pot_result = pot_eval(train_score, test_score, y_test[-len(test_score):], level=config.level) # output the results best_valid_metrics.update({ 'best-f1': t[0], 'precision': t[1], 'recall': t[2], 'TP': t[3], 'TN': t[4], 'FP': t[5], 'FN': t[6], 'latency': t[-1], 'threshold': th }) best_valid_metrics.update(pot_result) results.update_metrics(best_valid_metrics) if config.save_dir is not None: # save the variables var_dict = get_variables_as_dict(model_vs) saver = VariableSaver(var_dict, config.save_dir) saver.save() print('=' * 30 + 'result' + '=' * 30) pprint(best_valid_metrics)
def main(): if config.GPU_device_number != "-1": os.environ["CUDA_VISIBLE_DEVICES"] = config.GPU_device_number logging.basicConfig( level='INFO', format='%(asctime)s [%(levelname)s] %(name)s: %(message)s') save_z_flag = int(config.save_z) get_score_flag = int(config.get_score_for_each_machine_flag) config.untrainable_variables_keyvalues = (config.untrainable_variables_keyvalues.replace(" ", '')).split(',') \ if config.untrainable_variables_keyvalues is not None else None dataset_list = (config.dataset.replace(" ", '')).split(',') config.sample_ratio = 1.0 / len( dataset_list) if config.sample_ratio is None else config.sample_ratio config.x_dim = get_data_dim(dataset_list) # prepare the data (x_train_list, _), (x_test_list, y_test_list) = \ get_data(dataset_list, config.max_train_size, config.max_test_size, train_start=config.train_start, test_start=config.test_start) # construct the model under `variable_scope` named 'model' with tf.variable_scope(config.save_dir) as model_vs: model = OmniAnomaly(config=config, name=config.save_dir) # construct the trainer trainer = Trainer(model=model, model_vs=model_vs, max_epoch=config.max_epoch, batch_size=config.batch_size, valid_batch_size=config.test_batch_size, initial_lr=config.initial_lr, lr_anneal_epochs=config.lr_anneal_epoch_freq, lr_anneal_factor=config.lr_anneal_factor, grad_clip_norm=config.gradient_clip_norm, valid_step_freq=config.valid_step_freq, untrainable_variables_keyvalues=config. untrainable_variables_keyvalues) # construct the predictor predictor = Predictor(model, batch_size=config.batch_size, n_z=config.test_n_z, last_point_only=True) with tf.Session().as_default(): if config.restore_dir is not None: # Restore variables from `save_dir`. saver = VariableSaver(get_variables_as_dict(model_vs), config.restore_dir) saver.restore() if config.max_epoch > 0: # train the model train_start = time.time() best_valid_metrics = trainer.fit( x_train_list, sample_ratio=config.sample_ratio) train_time = (time.time() - train_start) / config.max_epoch best_valid_metrics.update({'train_time': train_time}) else: best_valid_metrics = {} # get score of train set for POT algorithm if get_score_flag: for ds, x_train, x_test, y_test in zip(dataset_list, x_train_list, x_test_list, y_test_list): train_score, train_z, train_pred_speed = predictor.get_score( x_train) if config.train_score_filename is not None: with open( os.path.join( config.result_dir, f'{ds}-{config.train_score_filename}'), 'wb') as file: pickle.dump(train_score, file) if save_z_flag: save_z( train_z, os.path.join(config.result_dir, f'{ds}-train_z')) test_start = time.time() test_score, test_z, pred_speed = predictor.get_score( x_test) test_time = time.time() - test_start if config.test_score_filename is not None: with open( os.path.join( config.result_dir, f'{ds}-{config.test_score_filename}'), 'wb') as file: pickle.dump(test_score, file) if save_z_flag: save_z(test_z, os.path.join(config.result_dir, f'{ds}-test_z')) if y_test is not None and len(y_test) >= len(test_score): if config.get_score_on_dim: # get the joint score test_score = np.sum(test_score, axis=-1) train_score = np.sum(train_score, axis=-1) # get best f1 t, th = bf_search(test_score, y_test[-len(test_score):], start=config.bf_search_min, end=config.bf_search_max, step_num=int( abs(config.bf_search_max - config.bf_search_min) / config.bf_search_step_size), display_freq=50) # get pot results pot_result = pot_eval(train_score, test_score, y_test[-len(test_score):], level=config.level) result_dict = { 'pred_time': pred_speed, 'pred_total_time': test_time, 'best-f1': t[0], 'precision': t[1], 'recall': t[2], 'TP': t[3], 'TN': t[4], 'FP': t[5], 'FN': t[6], 'latency': t[-1], 'threshold': th } for pot_key, pot_value in pot_result.items(): result_dict[pot_key] = pot_value with open( os.path.join(config.result_dir, f'{ds}-result.json'), 'wb') as file: pickle.dump(result_dict, file) if config.save_dir is not None: # save the variables var_dict = get_variables_as_dict(model_vs) saver = VariableSaver(var_dict, config.save_dir) saver.save() print('=' * 30 + 'result' + '=' * 30) pprint(best_valid_metrics)