def test_variable_reuse(self): @global_reuse def get_donut(): return Donut( h_for_p_x=lambda x: x, h_for_q_z=lambda x: x, x_dims=5, z_dims=3, ) tf.set_random_seed(1234) donut1 = get_donut() donut2 = get_donut() self.assertEqual(donut1.variable_scope.name, donut2.variable_scope.name) x = tf.reshape(tf.range(20, dtype=tf.float32), [4, 5]) _ = donut1.get_score(x) _ = donut1.get_score(x) _ = donut2.get_score(x) _ = donut2.get_score(x) self.assertListEqual(sorted(get_variables_as_dict()), [ 'get_donut/donut/p_x_given_z/x_mean/bias', 'get_donut/donut/p_x_given_z/x_mean/kernel', 'get_donut/donut/p_x_given_z/x_std/bias', 'get_donut/donut/p_x_given_z/x_std/kernel', 'get_donut/donut/q_z_given_x/z_mean/bias', 'get_donut/donut/q_z_given_x/z_mean/kernel', 'get_donut/donut/q_z_given_x/z_std/bias', 'get_donut/donut/q_z_given_x/z_std/kernel' ])
def fit(self, iterator): tf_config = tf.ConfigProto(allow_soft_placement=True) tf_config.gpu_options.allow_growth = True with tf.variable_scope("model") as model_vs: with tf.Session(config=tf_config).as_default(): if self.config.restore_dir is not None: # Restore variables from `save_dir`. saver = VariableSaver(get_variables_as_dict(model_vs), self.config.restore_dir) saver.restore() best_valid_metrics = self.trainer.fit(iterator) self.time_tracker["train"] = best_valid_metrics[ "total_train_time"] if self.config.save_dir is not None: # save the variables var_dict = get_variables_as_dict(model_vs) saver = VariableSaver(var_dict, self.config.save_dir) saver.save() print("=" * 30 + "result" + "=" * 30)
def predict_prob(self, iterator): tf_config = tf.ConfigProto(allow_soft_placement=True) tf_config.gpu_options.allow_growth = True with tf.variable_scope("model") as model_vs: with tf.Session(config=tf_config).as_default(): if self.config.save_dir is not None: # Restore variables from `save_dir`. saver = VariableSaver(get_variables_as_dict(model_vs), self.config.save_dir) saver.restore() score, z, pred_time = self.predictor.get_score(iterator) self.time_tracker["test"] = pred_time return score
def get_variables(self, collection=tf.GraphKeys.GLOBAL_VARIABLES): """Get the variables defined within the model's scope. This method will get variables which are in `variable_scope` and the specified `collection`, as a dict which maps relative names to variable objects. By "relative name" we mean to remove the name of `variable_scope` from the front of variable names. Parameters ---------- collection : str The name of the variable collection. If not specified, will use `tf.GraphKeys.GLOBAL_VARIABLES`. Returns ------- dict[str, tf.Variable] Dict which maps from relative names to variable objects. """ self.build() return get_variables_as_dict(self.variable_scope, collection=collection)
def get_param_variables(self, collection=tf.GraphKeys.GLOBAL_VARIABLES): """Get the parameter variables. The parameter variables are the variables defined in "model" sub-scope within the model's variable scope. Parameters ---------- collection : str The name of the variable collection. If not specified, will use `tf.GraphKeys.GLOBAL_VARIABLES`. Returns ------- dict[str, tf.Variable] Dict which maps from relative names to variable objects. """ self.build() vs_name = self.variable_scope.name + '/' if vs_name and not vs_name.endswith('/'): vs_name += '/' vs_name += 'model/' variables = get_variables_as_dict(vs_name, collection=collection) return {'model/' + k: v for k, v in six.iteritems(variables)}
def __init__( self, model, model_vs=None, n_z=None, feed_dict=None, valid_feed_dict=None, use_regularization_loss=True, max_epoch=256, max_step=None, batch_size=256, valid_batch_size=1024, valid_step_freq=100, initial_lr=0.001, lr_anneal_epochs=10, lr_anneal_factor=0.75, optimizer=tf.train.AdamOptimizer, optimizer_params=None, grad_clip_norm=50.0, check_numerics=True, name=None, scope=None, ): super(Trainer, self).__init__(name=name, scope=scope) # memorize the arguments self._model = model self._n_z = n_z if feed_dict is not None: self._feed_dict = dict(six.iteritems(feed_dict)) else: self._feed_dict = {} if valid_feed_dict is not None: self._valid_feed_dict = dict(six.iteritems(valid_feed_dict)) else: self._valid_feed_dict = self._feed_dict if max_epoch is None and max_step is None: raise ValueError( "At least one of `max_epoch` and `max_step` " "should be specified" ) self._max_epoch = max_epoch self._max_step = max_step self._batch_size = batch_size self._valid_batch_size = valid_batch_size self._valid_step_freq = valid_step_freq self._initial_lr = initial_lr self._lr_anneal_epochs = lr_anneal_epochs self._lr_anneal_factor = lr_anneal_factor # build the trainer with reopen_variable_scope(self.variable_scope): # the global step for this model self._global_step = tf.get_variable( dtype=tf.int64, name="global_step", trainable=False, initializer=tf.constant(0, dtype=tf.int64), # reuse=True, ) # input placeholders self._input_x = tf.placeholder( dtype=tf.float32, shape=[None, model.window_length, model.x_dims], name="input_x", ) self._learning_rate = tf.placeholder( dtype=tf.float32, shape=(), name="learning_rate" ) # compose the training loss with tf.name_scope("loss"): loss = model.get_training_loss(x=self._input_x, n_z=n_z) if use_regularization_loss: loss += tf.losses.get_regularization_loss() self._loss = loss # get the training variables train_params = get_variables_as_dict( scope=model_vs, collection=tf.GraphKeys.TRAINABLE_VARIABLES ) self._train_params = train_params # create the trainer if optimizer_params is None: optimizer_params = {} else: optimizer_params = dict(six.iteritems(optimizer_params)) optimizer_params["learning_rate"] = self._learning_rate self._optimizer = optimizer(**optimizer_params) # derive the training gradient origin_grad_vars = self._optimizer.compute_gradients( self._loss, list(six.itervalues(self._train_params)) ) grad_vars = [] for grad, var in origin_grad_vars: if grad is not None and var is not None: if grad_clip_norm: grad = tf.clip_by_norm(grad, grad_clip_norm) if check_numerics: grad = tf.check_numerics( grad, "gradient for {} has numeric issue".format(var.name) ) grad_vars.append((grad, var)) # build the training op with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): self._train_op = self._optimizer.apply_gradients( grad_vars, global_step=self._global_step ) # the training summary in case `summary_dir` is specified with tf.name_scope("summary"): self._summary_op = tf.summary.merge( [ tf.summary.histogram(v.name.rsplit(":", 1)[0], v) for v in six.itervalues(self._train_params) ] ) # initializer for the variables self._trainer_initializer = tf.variables_initializer( list( six.itervalues( get_variables_as_dict( scope=self.variable_scope, collection=tf.GraphKeys.GLOBAL_VARIABLES, ) ) ) )
def __init__(self, model, model_vs=None, n_z=None, feed_dict=None, valid_feed_dict=None, use_regularization_loss=True, max_epoch=256, max_step=None, batch_size=256, valid_batch_size=1024, valid_step_freq=100, initial_lr=0.001, lr_anneal_epochs=10, lr_anneal_factor=0.75, optimizer=tf.train.AdamOptimizer, optimizer_params=None, grad_clip_norm=50.0, check_numerics=True, name=None, scope=None, untrainable_variables_keyvalues=None): super(Trainer, self).__init__(name=name, scope=scope) # memorize the arguments self._model = model self._n_z = n_z if feed_dict is not None: self._feed_dict = dict(six.iteritems(feed_dict)) else: self._feed_dict = {} if valid_feed_dict is not None: self._valid_feed_dict = dict(six.iteritems(valid_feed_dict)) else: self._valid_feed_dict = self._feed_dict if max_epoch is None and max_step is None: raise ValueError('At least one of `max_epoch` and `max_step` ' 'should be specified') self._max_epoch = max_epoch self._max_step = max_step self._batch_size = batch_size self._valid_batch_size = valid_batch_size self._valid_step_freq = valid_step_freq self._initial_lr = initial_lr self._lr_anneal_epochs = lr_anneal_epochs self._lr_anneal_factor = lr_anneal_factor # build the trainer with reopen_variable_scope(self.variable_scope): # the global step for this model self._global_step = tf.get_variable(dtype=tf.int64, name='global_step', trainable=False, initializer=tf.constant( 0, dtype=tf.int64)) # input placeholders self._input_x = tf.placeholder( dtype=tf.float32, shape=[None, model.window_length, model.x_dims], name='input_x') self._learning_rate = tf.placeholder(dtype=tf.float32, shape=(), name='learning_rate') # compose the training loss with tf.name_scope('loss'): loss = model.get_training_loss(x=self._input_x, n_z=n_z) if use_regularization_loss: loss += tf.losses.get_regularization_loss() self._loss = loss # get the training variables train_params = get_variables_as_dict( scope=model_vs, collection=tf.GraphKeys.TRAINABLE_VARIABLES) print(train_params) self._train_params = train_params # create the trainer if optimizer_params is None: optimizer_params = {} else: optimizer_params = dict(six.iteritems(optimizer_params)) optimizer_params['learning_rate'] = self._learning_rate self._optimizer = optimizer(**optimizer_params) # derive the training gradient origin_grad_vars = self._optimizer.compute_gradients( self._loss, list(six.itervalues(self._train_params))) grad_vars = [] def get_variable_via_scope(scope_lst): vars = [] for scope in scope_lst: sc_variable = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope) vars.extend(sc_variable) return vars if untrainable_variables_keyvalues is not None: raw_name_list = [_vars[1] for _vars in origin_grad_vars] all_name_list = [ raw_name_list[v].name for v in range(len(raw_name_list)) ] untrainable_variables_list = [] for kv in untrainable_variables_keyvalues: untrainable_variables_list.extend( [tfv for tfv in all_name_list if kv in tfv]) convert_untrainable_variables_list = get_variable_via_scope( untrainable_variables_list) else: convert_untrainable_variables_list = [] for grad, var in origin_grad_vars: if grad is not None and var is not None: if grad_clip_norm: grad = tf.clip_by_norm(grad, grad_clip_norm) if check_numerics: grad = tf.check_numerics( grad, 'gradient for {} has numeric issue'.format( var.name)) if var in convert_untrainable_variables_list: continue grad_vars.append((grad, var)) # build the training op with tf.control_dependencies( tf.get_collection(tf.GraphKeys.UPDATE_OPS)): self._train_op = self._optimizer.apply_gradients( grad_vars, global_step=self._global_step) # print(tf.trainable_variables()) # the training summary in case `summary_dir` is specified with tf.name_scope('summary'): self._summary_op = tf.summary.merge([ tf.summary.histogram(v.name.rsplit(':', 1)[0], v) for v in six.itervalues(self._train_params) ]) # initializer for the variables self._trainer_initializer = tf.variables_initializer( list( six.itervalues( get_variables_as_dict( scope=self.variable_scope, collection=tf.GraphKeys.GLOBAL_VARIABLES))))
def main(): logging.basicConfig( level='INFO', format='%(asctime)s [%(levelname)s] %(name)s: %(message)s') # prepare the data (x_train, _), (x_test, y_test) = \ get_data(config.dataset, config.max_train_size, config.max_test_size, train_start=config.train_start, test_start=config.test_start) # construct the model under `variable_scope` named 'model' with tf.variable_scope('model') as model_vs: model = OmniAnomaly(config=config, name="model") # construct the trainer trainer = Trainer(model=model, model_vs=model_vs, max_epoch=config.max_epoch, batch_size=config.batch_size, valid_batch_size=config.test_batch_size, initial_lr=config.initial_lr, lr_anneal_epochs=config.lr_anneal_epoch_freq, lr_anneal_factor=config.lr_anneal_factor, grad_clip_norm=config.gradient_clip_norm, valid_step_freq=config.valid_step_freq) # construct the predictor predictor = Predictor(model, batch_size=config.batch_size, n_z=config.test_n_z, last_point_only=True) with tf.Session().as_default(): if config.restore_dir is not None: # Restore variables from `save_dir`. saver = VariableSaver(get_variables_as_dict(model_vs), config.restore_dir) saver.restore() if config.max_epoch > 0: # train the model train_start = time.time() best_valid_metrics = trainer.fit(x_train) train_time = (time.time() - train_start) / config.max_epoch best_valid_metrics.update({'train_time': train_time}) else: best_valid_metrics = {} # get score of train set for POT algorithm train_score, train_z, train_pred_speed = predictor.get_score( x_train) if config.train_score_filename is not None: with open( os.path.join(config.result_dir, config.train_score_filename), 'wb') as file: pickle.dump(train_score, file) if config.save_z: save_z(train_z, 'train_z') if x_test is not None: # get score of test set test_start = time.time() test_score, test_z, pred_speed = predictor.get_score(x_test) test_time = time.time() - test_start if config.save_z: save_z(test_z, 'test_z') best_valid_metrics.update({ 'pred_time': pred_speed, 'pred_total_time': test_time }) if config.test_score_filename is not None: with open( os.path.join(config.result_dir, config.test_score_filename), 'wb') as file: pickle.dump(test_score, file) if y_test is not None and len(y_test) >= len(test_score): if config.get_score_on_dim: # get the joint score test_score = np.sum(test_score, axis=-1) train_score = np.sum(train_score, axis=-1) # get best f1 t, th = bf_search( test_score, y_test[-len(test_score):], start=config.bf_search_min, end=config.bf_search_max, step_num=int( abs(config.bf_search_max - config.bf_search_min) / config.bf_search_step_size), display_freq=50) # get pot results pot_result = pot_eval(train_score, test_score, y_test[-len(test_score):], level=config.level) # output the results best_valid_metrics.update({ 'best-f1': t[0], 'precision': t[1], 'recall': t[2], 'TP': t[3], 'TN': t[4], 'FP': t[5], 'FN': t[6], 'latency': t[-1], 'threshold': th }) best_valid_metrics.update(pot_result) results.update_metrics(best_valid_metrics) if config.save_dir is not None: # save the variables var_dict = get_variables_as_dict(model_vs) saver = VariableSaver(var_dict, config.save_dir) saver.save() print('=' * 30 + 'result' + '=' * 30) pprint(best_valid_metrics)
def main(): if config.GPU_device_number != "-1": os.environ["CUDA_VISIBLE_DEVICES"] = config.GPU_device_number logging.basicConfig( level='INFO', format='%(asctime)s [%(levelname)s] %(name)s: %(message)s') new_untrainable_variables_keyvalues = (config.untrainable_variables_keyvalues.replace(" ", '')).split(',') \ if config.untrainable_variables_keyvalues is not None else None dataset_list = (config.dataset.replace(" ", '')).split(',') index_list = [int(i) for i in (config.index.replace(" ", '')).split(',') ] if config.index is not None else None config.x_dim = get_data_dim(dataset_list) # prepare the data if config.get_file_way == 'pkl': (x_train_list, train_timestamp_list, _), (x_test_list, test_timestamp_list, y_test_list), KPI_list = \ get_data(dataset_list, method=config.get_file_way) if 'flow' in config.get_file_way: (x_train_list, train_timestamp_list, _), (x_test_list, test_timestamp_list, y_test_list), KPI_list = \ get_data(dataset_list, start_time=config.get_data_start_time, last_time=config.get_data_last_time, sample_ratio=config.get_data_sample_ratio, method=config.get_file_way, average_flag=config.average_flag, number_list=index_list, result_dir=config.result_dir) # construct the model under `variable_scope` named 'model' with tf.variable_scope(config.restore_dir) if config.restore_dir is not None \ else tf.variable_scope(config.save_dir) as model_vs: model = OmniAnomaly(config=config, name=config.save_dir) if config.restore_dir is None \ else OmniAnomaly(config=config, name=config.restore_dir) # construct the trainer trainer = Trainer( model=model, model_vs=model_vs, max_epoch=config.max_epoch, batch_size=config.batch_size, valid_batch_size=config.test_batch_size, initial_lr=config.initial_lr, lr_anneal_epochs=config.lr_anneal_epoch_freq, lr_anneal_factor=config.lr_anneal_factor, grad_clip_norm=config.gradient_clip_norm, valid_step_freq=config.valid_step_freq, untrainable_variables_keyvalues=new_untrainable_variables_keyvalues ) # construct the predictor predictor = Predictor(model, batch_size=config.batch_size, n_z=config.test_n_z, last_point_only=True) with tf.Session().as_default(): if config.restore_dir is not None: # Restore variables from `save_dir`. saver = VariableSaver(get_variables_as_dict(model_vs), config.restore_dir) saver.restore() if config.max_epoch > 0: # train the model train_start = time.time() best_valid_metrics = trainer.fit(x_train_list, valid_portion=0.1) train_time = (time.time() - train_start) / config.max_epoch best_valid_metrics.update({'train_time': train_time}) else: best_valid_metrics = {} # get score of train set for POT algorithm if config.get_score_for_each_machine_flag: if config.get_file_way == 'train_flow': st = time.time() for ds, x_train, train_timestamp in zip( dataset_list, x_train_list, train_timestamp_list): train_score, train_z, train_pred_speed = predictor.get_score( x_train, sample_ratio=config.sample_z_ratio) if config.train_score_filename is not None: with open( os.path.join( config.result_dir, f'{ds}-{config.train_score_filename}'), 'wb') as file: pickle.dump(train_score, file) # with open(os.path.join(config.result_dir, f'{ds}-train_timestamp.pkl'), 'wb') as file: # pickle.dump(train_timestamp[int(config.window_length-1):], file) if config.save_z: save_z( train_z, os.path.join(config.result_dir, f'{ds}-train_z')) print( f'testing {len(dataset_list)} machine entities cost {time.time() - st}' ) if (config.save_dir is not None) & (config.save_model_flag): # save the variables var_dict = get_variables_as_dict(model_vs) if config.restore_dir is not None: var_dict = { k.replace(config.restore_dir, config.save_dir): i for k, i in var_dict.items() } saver = VariableSaver(var_dict, config.save_dir) saver.save() print('=' * 30 + 'result' + '=' * 30) pprint(best_valid_metrics)
def test_get_variables_as_dict(self): GLOBAL_VARIABLES = tf.GraphKeys.GLOBAL_VARIABLES MODEL_VARIABLES = tf.GraphKeys.MODEL_VARIABLES LOCAL_VARIABLES = tf.GraphKeys.LOCAL_VARIABLES # create the variables to be checked a = tf.get_variable('a', shape=(), collections=[GLOBAL_VARIABLES, MODEL_VARIABLES]) b = tf.get_variable('b', shape=(), collections=[GLOBAL_VARIABLES]) c = tf.get_variable('c', shape=(), collections=[MODEL_VARIABLES]) with tf.variable_scope('child') as child: child_a = tf.get_variable( 'a', shape=(), collections=[GLOBAL_VARIABLES, MODEL_VARIABLES]) child_b = tf.get_variable('b', shape=(), collections=[GLOBAL_VARIABLES]) child_c = tf.get_variable('c', shape=(), collections=[MODEL_VARIABLES]) # test to get variables as dict self.assertEqual(get_variables_as_dict(), { 'a': a, 'b': b, 'child/a': child_a, 'child/b': child_b }) self.assertEqual(get_variables_as_dict(collection=MODEL_VARIABLES), { 'a': a, 'c': c, 'child/a': child_a, 'child/c': child_c }) self.assertEqual(get_variables_as_dict(collection=LOCAL_VARIABLES), {}) self.assertEqual(get_variables_as_dict(''), { 'a': a, 'b': b, 'child/a': child_a, 'child/b': child_b }) self.assertEqual(get_variables_as_dict('child'), { 'a': child_a, 'b': child_b }) self.assertEqual(get_variables_as_dict('child/'), { 'a': child_a, 'b': child_b }) self.assertEqual(get_variables_as_dict(child), { 'a': child_a, 'b': child_b }) self.assertEqual( get_variables_as_dict('child', collection=MODEL_VARIABLES), { 'a': child_a, 'c': child_c }) self.assertEqual( get_variables_as_dict('child', collection=LOCAL_VARIABLES), {}) self.assertEqual(get_variables_as_dict('non_exist'), {})
def main(dataset, subdataset): logging.basicConfig( level="INFO", format="%(asctime)s [%(levelname)s] %(name)s: %(message)s") # # prepare the data # (x_train, _), (x_test, y_test) = get_data( # config.dataset, # config.max_train_size, # config.max_test_size, # train_start=config.train_start, # test_start=config.test_start, # ) (x_train, _), (x_test, y_test) = load_dataset(dataset, subdataset) tf.reset_default_graph() # construct the model under `variable_scope` named 'model' with tf.variable_scope("model") as model_vs: model = OmniAnomaly(config=config, name="model") # construct the trainer trainer = Trainer( model=model, model_vs=model_vs, max_epoch=config.max_epoch, batch_size=config.batch_size, valid_batch_size=config.test_batch_size, initial_lr=config.initial_lr, lr_anneal_epochs=config.lr_anneal_epoch_freq, lr_anneal_factor=config.lr_anneal_factor, grad_clip_norm=config.gradient_clip_norm, valid_step_freq=config.valid_step_freq, ) # construct the predictor predictor = Predictor( model, batch_size=config.batch_size, n_z=config.test_n_z, last_point_only=True, ) with tf.Session().as_default(): if config.restore_dir is not None: # Restore variables from `save_dir`. saver = VariableSaver(get_variables_as_dict(model_vs), config.restore_dir) saver.restore() if config.max_epoch > 0: # train the model train_start = time.time() best_valid_metrics = trainer.fit(x_train) train_time = time.time() - train_start # best_valid_metrics.update({"train_time": train_time}) else: best_valid_metrics = {} # get score of train set for POT algorithm train_score, train_z, train_pred_speed = predictor.get_score( x_train) if config.train_score_filename is not None: with open( os.path.join(config.result_dir, config.train_score_filename), "wb") as file: pickle.dump(train_score, file) if config.save_z: save_z(train_z, "train_z") if x_test is not None: # get score of test set test_start = time.time() test_score, test_z, pred_speed = predictor.get_score(x_test) test_time = time.time() - test_start if config.save_z: save_z(test_z, "test_z") best_valid_metrics.update({ "pred_time": pred_speed, "pred_total_time": test_time }) if config.test_score_filename is not None: with open( os.path.join(config.result_dir, config.test_score_filename), "wb", ) as file: pickle.dump(test_score, file) if y_test is not None and len(y_test) >= len(test_score): if config.get_score_on_dim: # get the joint score test_score = np.sum(test_score, axis=-1) train_score = np.sum(train_score, axis=-1) # get best f1 t, th = bf_search( test_score, y_test[-len(test_score):], start=config.bf_search_min, end=config.bf_search_max, step_num=int( abs(config.bf_search_max - config.bf_search_min) / config.bf_search_step_size), display_freq=50, ) # get pot results pot_result = pot_eval( train_score, test_score, y_test[-len(test_score):], level=config.level, ) # output the results best_valid_metrics.update({ "best-f1": t[0], "precision": t[1], "recall": t[2], "TP": t[3], "TN": t[4], "FP": t[5], "FN": t[6], "latency": t[-1], "threshold": th, "test_score": test_score, "labels": y_test[-len(test_score):], }) best_valid_metrics.update(pot_result) results.update_metrics(best_valid_metrics) if config.save_dir is not None: # save the variables var_dict = get_variables_as_dict(model_vs) saver = VariableSaver(var_dict, config.save_dir) saver.save() print("=" * 30 + "result" + "=" * 30) pprint(best_valid_metrics) return best_valid_metrics
kernel_regularizer=K.regularizers.l2(0.001), activation=tf.nn.relu), K.layers.Dense(100, kernel_regularizer=K.regularizers.l2(0.001), activation=tf.nn.relu), ]), x_dims=120, z_dims=5, ) # To train the Donut model, and use a trained model for prediction trainer = DonutTrainer(model=model, model_vs=model_vs) predictor = DonutPredictor(model) with tf.Session().as_default(): #trainer.fit(train_values, train_labels, train_missing, mean, std) #var_dict = get_variables_as_dict(model_vs) #saver = VariableSaver(var_dict, "donut_without_label_2.ckpt") #saver.save() # Restore variables from `save_dir`. saver = VariableSaver(get_variables_as_dict(model_vs), "donut_without_label_2.ckpt") saver.restore() test_score = predictor.get_score(test_values, test_missing) result = np.array([test_labels[119:], test_score]) np.savetxt('result_arti_sin2.csv', result.transpose(), delimiter=',', fmt='%.3f')
marker='^', color='green', label="Non Anomalies") plt.legend(['Non Anomalies']) plt.xlim(df3['timestamp'].min(), df3['timestamp'].max()) plt.ylim(-.0006, .0006) plt.title('Non Anomalies from Fetal Brain Scan') plt.ylabel('# Direct_1') plt.xlabel('Date-Time') plt.savefig('figs/out_brain_nanomaly.png') from tfsnippet.utils import get_variables_as_dict, VariableSaver session = K.backend.get_session() init = tf.global_variables_initializer() session.run(init) with session.as_default(): var_dict = get_variables_as_dict(model_vs) # save variables to `save_dir` saver = VariableSaver(var_dict, save_dir) saver.save() print("Saved the model successfully") with session.as_default(): # Restore the model. saver = VariableSaver(get_variables_as_dict(model_vs), save_dir) saver.restore() print("Restored the model successfully")
def __init__(self, model, model_vs=None, n_z=None, feed_dict=None, valid_feed_dict=None, missing_data_injection_rate=0.01, use_regularization_loss=True, max_epoch=256, max_step=None, batch_size=256, valid_batch_size=1024, valid_step_freq=100, initial_lr=0.001, lr_anneal_epochs=10, lr_anneal_factor=0.75, optimizer=tf.train.AdamOptimizer, optimizer_params=None, grad_clip_norm=10.0, check_numerics=True, name=None, scope=None): super(DonutTrainer, self).__init__(name=name, scope=scope) # 记忆参数 self._model = model self._n_z = n_z if feed_dict is not None: # 迭代器->字典 self._feed_dict = dict(six.iteritems(feed_dict)) else: self._feed_dict = {} if valid_feed_dict is not None: self._valid_feed_dict = dict(six.iteritems(valid_feed_dict)) else: # 为空使用feed_dict self._valid_feed_dict = self._feed_dict self._missing_data_injection_rate = missing_data_injection_rate # 必须有最大限制 if max_epoch is None and max_step is None: raise ValueError('`max_epoch`和`max_step`至少有一个被指定') self._max_epoch = max_epoch self._max_step = max_step self._batch_size = batch_size self._valid_batch_size = valid_batch_size self._valid_step_freq = valid_step_freq self._initial_lr = initial_lr self._lr_anneal_epochs = lr_anneal_epochs self._lr_anneal_factor = lr_anneal_factor # 构建训练器 with reopen_variable_scope(self.variable_scope): # 输入占位符 x,y输入列都为x维数,学习率为一维 self._input_x = tf.placeholder(dtype=tf.float32, shape=[None, model.x_dims], name='input_x') self._input_y = tf.placeholder(dtype=tf.int32, shape=[None, model.x_dims], name='input_y') self._learning_rate = tf.placeholder(dtype=tf.float32, shape=(), name='learning_rate') # 弥补训练损失 with tf.name_scope('loss'): loss = model.get_training_loss(x=self._input_x, y=self._input_y, n_z=n_z) if use_regularization_loss: loss += tf.losses.get_regularization_loss() self._loss = loss # 获得训练变量 train_params = get_variables_as_dict( scope=model_vs, collection=tf.GraphKeys.TRAINABLE_VARIABLES) self._train_params = train_params # 创建训练器 if optimizer_params is None: optimizer_params = {} else: optimizer_params = dict(six.iteritems(optimizer_params)) optimizer_params['learning_rate'] = self._learning_rate # 默认 实现Adam算法的优化器。 self._optimizer = optimizer(**optimizer_params) # 推导训练梯度 对var_list中的变量计算loss的梯度 # 该函数为函数minimize()的第一部分,返回一个以元组(gradient, variable)组成的列表 origin_grad_vars = self._optimizer.compute_gradients( self._loss, list(six.itervalues(self._train_params))) grad_vars = [] for grad, var in origin_grad_vars: if grad is not None and var is not None: if grad_clip_norm: # 剪辑张量值到最大l2范数。 grad = tf.clip_by_norm(grad, grad_clip_norm) if check_numerics: # 检查一个张量中的NaN和Inf值。 grad = tf.check_numerics( grad, 'gradient for {} has numeric issue'.format( var.name)) grad_vars.append((grad, var)) # 构建训练操作 # 模型的全局步长 常量初始化 self._global_step = tf.get_variable(dtype=tf.int64, name='global_step', trainable=False, initializer=tf.constant( 0, dtype=tf.int64)) # 保证其辖域中的操作必须要在该函数所传递的参数中的操作完成后再进行。需要在训练操作之前完成的操作。 with tf.control_dependencies( tf.get_collection(tf.GraphKeys.UPDATE_OPS)): # 将计算出的梯度应用到变量上,是函数minimize()的第二部分,返回一个应用指定的梯度的操作Operation,对global_step做自增操作 self._train_op = self._optimizer.apply_gradients( grad_vars, global_step=self._global_step) # 如果指定了`summary_dir`,则为训练摘要 with tf.name_scope('summary'): self._summary_op = tf.summary.merge([ tf.summary.histogram(v.name.rsplit(':', 1)[0], v) for v in six.itervalues(self._train_params) ]) # 变量的初始化 self._trainer_initializer = tf.variables_initializer( list(six.itervalues(self.get_variables_as_dict())))
def main(): if config.GPU_device_number != "-1": os.environ["CUDA_VISIBLE_DEVICES"] = config.GPU_device_number logging.basicConfig( level='INFO', format='%(asctime)s [%(levelname)s] %(name)s: %(message)s') dataset_list = (config.dataset.replace(" ", '')).split(',') index_list = [int(i) for i in (config.index.replace(" ", '')).split(',') ] if config.index is not None else None config.x_dim = get_data_dim(dataset_list) # construct the model under `variable_scope` named 'model' with tf.variable_scope(config.save_dir) as model_vs: model = OmniAnomaly(config=config, name=config.save_dir) # construct the trainer trainer = Trainer( model=model, model_vs=model_vs, max_epoch=config.max_epoch, batch_size=config.batch_size, valid_batch_size=config.test_batch_size, initial_lr=config.initial_lr, lr_anneal_epochs=config.lr_anneal_epoch_freq, lr_anneal_factor=config.lr_anneal_factor, grad_clip_norm=config.gradient_clip_norm, valid_step_freq=config.valid_step_freq, untrainable_variables_keyvalues=["rnn_p_x", "rnn_q_z"], ) # construct the predictor predictor = Predictor(model, batch_size=config.batch_size, n_z=config.test_n_z, last_point_only=True) with tf.Session().as_default(): # Restore variables from `save_dir`. saver = VariableSaver(get_variables_as_dict(model_vs), config.save_dir) saver.restore() # get score of train set for POT algorithm if config.get_score_for_each_machine_flag: if config.get_file_way == 'test_flow': (x_train_list, train_timestamp_list, _), (x_test_list, test_timestamp_list, y_test_list), _ = \ get_data(dataset_list, start_time=config.get_data_start_time, last_time=config.get_data_last_time, method=config.get_file_way, number_list=index_list, result_dir=config.result_dir) for i in range(99, len(test_timestamp_list)): test_data = np.hstack(x_test_list[i - 99:i + 1]).reshape(-1, 49) test_score, test_z, pred_speed = predictor.get_score( test_data, mode='cluster_test') test_label = np.zeros([test_score.shape[0]]) if config.test_score_filename is not None: with open( os.path.join( config.result_dir, f'{test_timestamp_list[i]}-{config.test_score_filename}' ), 'wb') as file: pickle.dump( [dataset_list, test_score, test_label], file)
def main(): if config.GPU_device_number != "-1": os.environ["CUDA_VISIBLE_DEVICES"] = config.GPU_device_number logging.basicConfig( level='INFO', format='%(asctime)s [%(levelname)s] %(name)s: %(message)s') save_z_flag = int(config.save_z) get_score_flag = int(config.get_score_for_each_machine_flag) config.untrainable_variables_keyvalues = (config.untrainable_variables_keyvalues.replace(" ", '')).split(',') \ if config.untrainable_variables_keyvalues is not None else None dataset_list = (config.dataset.replace(" ", '')).split(',') config.sample_ratio = 1.0 / len( dataset_list) if config.sample_ratio is None else config.sample_ratio config.x_dim = get_data_dim(dataset_list) # prepare the data (x_train_list, _), (x_test_list, y_test_list) = \ get_data(dataset_list, config.max_train_size, config.max_test_size, train_start=config.train_start, test_start=config.test_start) # construct the model under `variable_scope` named 'model' with tf.variable_scope(config.save_dir) as model_vs: model = OmniAnomaly(config=config, name=config.save_dir) # construct the trainer trainer = Trainer(model=model, model_vs=model_vs, max_epoch=config.max_epoch, batch_size=config.batch_size, valid_batch_size=config.test_batch_size, initial_lr=config.initial_lr, lr_anneal_epochs=config.lr_anneal_epoch_freq, lr_anneal_factor=config.lr_anneal_factor, grad_clip_norm=config.gradient_clip_norm, valid_step_freq=config.valid_step_freq, untrainable_variables_keyvalues=config. untrainable_variables_keyvalues) # construct the predictor predictor = Predictor(model, batch_size=config.batch_size, n_z=config.test_n_z, last_point_only=True) with tf.Session().as_default(): if config.restore_dir is not None: # Restore variables from `save_dir`. saver = VariableSaver(get_variables_as_dict(model_vs), config.restore_dir) saver.restore() if config.max_epoch > 0: # train the model train_start = time.time() best_valid_metrics = trainer.fit( x_train_list, sample_ratio=config.sample_ratio) train_time = (time.time() - train_start) / config.max_epoch best_valid_metrics.update({'train_time': train_time}) else: best_valid_metrics = {} # get score of train set for POT algorithm if get_score_flag: for ds, x_train, x_test, y_test in zip(dataset_list, x_train_list, x_test_list, y_test_list): train_score, train_z, train_pred_speed = predictor.get_score( x_train) if config.train_score_filename is not None: with open( os.path.join( config.result_dir, f'{ds}-{config.train_score_filename}'), 'wb') as file: pickle.dump(train_score, file) if save_z_flag: save_z( train_z, os.path.join(config.result_dir, f'{ds}-train_z')) test_start = time.time() test_score, test_z, pred_speed = predictor.get_score( x_test) test_time = time.time() - test_start if config.test_score_filename is not None: with open( os.path.join( config.result_dir, f'{ds}-{config.test_score_filename}'), 'wb') as file: pickle.dump(test_score, file) if save_z_flag: save_z(test_z, os.path.join(config.result_dir, f'{ds}-test_z')) if y_test is not None and len(y_test) >= len(test_score): if config.get_score_on_dim: # get the joint score test_score = np.sum(test_score, axis=-1) train_score = np.sum(train_score, axis=-1) # get best f1 t, th = bf_search(test_score, y_test[-len(test_score):], start=config.bf_search_min, end=config.bf_search_max, step_num=int( abs(config.bf_search_max - config.bf_search_min) / config.bf_search_step_size), display_freq=50) # get pot results pot_result = pot_eval(train_score, test_score, y_test[-len(test_score):], level=config.level) result_dict = { 'pred_time': pred_speed, 'pred_total_time': test_time, 'best-f1': t[0], 'precision': t[1], 'recall': t[2], 'TP': t[3], 'TN': t[4], 'FP': t[5], 'FN': t[6], 'latency': t[-1], 'threshold': th } for pot_key, pot_value in pot_result.items(): result_dict[pot_key] = pot_value with open( os.path.join(config.result_dir, f'{ds}-result.json'), 'wb') as file: pickle.dump(result_dict, file) if config.save_dir is not None: # save the variables var_dict = get_variables_as_dict(model_vs) saver = VariableSaver(var_dict, config.save_dir) saver.save() print('=' * 30 + 'result' + '=' * 30) pprint(best_valid_metrics)