def test_init_invalid_mount_path(self): name = 'yoname' type = 'emptyDir' mount_path = object() config = utils.create_config() with self.assertRaises(SyntaxError): K8sVolume(config=config, name=name, type=type, mount_path=mount_path)
def test_emptydir_set_medium_emptystring(self): name = "yoname" type = "emptyDir" mount_path = "/path/on/container" config = utils.create_config() vol = K8sVolume(config=config, name=name, type=type, mount_path=mount_path) vol.set_medium('') self.assertEqual('', vol.model.medium)
def test_nfs_set_server_none(self): name = "yoname" type = "nfs" mount_path = "/path/on/container" config = utils.create_config() vol = K8sVolume(config=config, name=name, type=type, mount_path=mount_path) with self.assertRaises(SyntaxError): vol.set_server()
def test_git_repo_set_revision_none(self): name = "yoname" type = "gitRepo" mount_path = "/path/on/container" config = utils.create_config() vol = K8sVolume(config=config, name=name, type=type, mount_path=mount_path) with self.assertRaises(SyntaxError): vol.set_git_revision()
def test_gce_set_fs_type_none(self): name = "yoname" type = "gcePersistentDisk" mount_path = "/path/on/container" config = utils.create_config() vol = K8sVolume(config=config, name=name, type=type, mount_path=mount_path) with self.assertRaises(SyntaxError): vol.set_fs_type()
def __init__(self): self.storage = p4.Storage() try: self.config = utils.create_config("config.json") self.discord = discord.Discord(self.config) self.perforce = p4.init(self.config) except AssertionError as error: assert False, error
def test_emptydir_set_medium_invalid_type(self): name = "yoname" type = "hostPath" mount_path = "/path/on/container" config = utils.create_config() vol = K8sVolume(config=config, name=name, type=type, mount_path=mount_path) with self.assertRaises(SyntaxError): vol.set_medium()
def test_aws_set_fs_type_none(self): name = "yoname" type = "awsElasticBlockStore" mount_path = "/path/on/container" config = utils.create_config() vol = K8sVolume(config=config, name=name, type=type, mount_path=mount_path) with self.assertRaises(SyntaxError): vol.set_fs_type()
def test_gce_set_pd_name_invalid_obj(self): name = "yoname" type = "gcePersistentDisk" mount_path = "/path/on/container" pd_name = object() config = utils.create_config() vol = K8sVolume(config=config, name=name, type=type, mount_path=mount_path) with self.assertRaises(SyntaxError): vol.set_volume_id(pd_name)
def test_aws_set_fs_type(self): name = "yoname" type = "awsElasticBlockStore" mount_path = "/path/on/container" fs_type = "xfs" config = utils.create_config() vol = K8sVolume(config=config, name=name, type=type, mount_path=mount_path) vol.set_fs_type(fs_type) self.assertEqual(vol.model.fs_type, fs_type)
def test_fs_type_invalid_type(self): name = "yoname" type = "emptyDir" mount_path = "/path/on/container" fs_type = object() config = utils.create_config() vol = K8sVolume(config=config, name=name, type=type, mount_path=mount_path) with self.assertRaises(SyntaxError): vol.set_fs_type(fs_type)
def test_nfs_set_repo_invalid_type(self): name = "yoname" type = "emptyDir" mount_path = "/path/on/container" repo = "git@somewhere:me/my-git-repository.git" config = utils.create_config() vol = K8sVolume(config=config, name=name, type=type, mount_path=mount_path) with self.assertRaises(SyntaxError): vol.set_git_repository(repo)
def test_hostpath_set_path(self): name = "yoname" type = "hostPath" host_path = "/path/on/host" mount_path = "/path/on/container" config = utils.create_config() vol = K8sVolume(config=config, name=name, type=type, mount_path=mount_path) vol.set_path(host_path) self.assertEqual(host_path, vol.model.path)
def test_nfs_set_server_invalid_type(self): name = "yoname" type = "emptyDir" mount_path = "/path/on/container" server = "nfs.company.com" config = utils.create_config() vol = K8sVolume(config=config, name=name, type=type, mount_path=mount_path) with self.assertRaises(SyntaxError): vol.set_server(server)
def test_aws_set_volume_id_invalid_type(self): name = "yoname" type = "emptyDir" mount_path = "/path/on/container" volume_id = "vol-0a89c9040d544a371" config = utils.create_config() vol = K8sVolume(config=config, name=name, type=type, mount_path=mount_path) with self.assertRaises(SyntaxError): vol.set_volume_id(volume_id)
def test_aws_set_volume_id_invalid_obj(self): name = "yoname" type = "awsElasticBlockStore" mount_path = "/path/on/container" volume_id = object() config = utils.create_config() vol = K8sVolume(config=config, name=name, type=type, mount_path=mount_path) with self.assertRaises(SyntaxError): vol.set_volume_id(volume_id)
def test_aws_init(self): name = "yoname" type = "awsElasticBlockStore" mount_path = "/path/on/container" config = utils.create_config() vol = K8sVolume(config=config, name=name, type=type, mount_path=mount_path) self.assertIsNotNone(vol) self.assertIsInstance(vol, K8sVolume) self.assertEqual(type, vol.type)
def test_secret_set_name_invalid_obj(self): name = "yoname" type = "secret" mount_path = "/path/on/container" secret = object() config = utils.create_config() vol = K8sVolume(config=config, name=name, type=type, mount_path=mount_path) with self.assertRaises(SyntaxError): vol.set_secret_name(secret)
def test_nfs_set_server(self): name = "yoname" type = "nfs" mount_path = "/path/on/container" server = "nfs.company.com" config = utils.create_config() vol = K8sVolume(config=config, name=name, type=type, mount_path=mount_path) vol.set_server(server) self.assertEqual(vol.model.server, server)
def test_gce_set_pd_name_invalid_type(self): name = "yoname" type = "emptyDir" mount_path = "/path/on/container" pd_name = "yopdname" config = utils.create_config() vol = K8sVolume(config=config, name=name, type=type, mount_path=mount_path) with self.assertRaises(SyntaxError): vol.set_volume_id(pd_name)
def test_gce_set_pd_name(self): name = "yoname" type = "awsElasticBlockStore" mount_path = "/path/on/container" volume_id = "vol-0a89c9040d544a371" config = utils.create_config() vol = K8sVolume(config=config, name=name, type=type, mount_path=mount_path) vol.set_volume_id(volume_id) self.assertEqual(vol.model.aws_volume_id, volume_id)
def test_gce_set_fs_type(self): name = "yoname" type = "gcePersistentDisk" mount_path = "/path/on/container" fs_type = "xfs" config = utils.create_config() vol = K8sVolume(config=config, name=name, type=type, mount_path=mount_path) vol.set_fs_type(fs_type) self.assertEqual(vol.model.fs_type, fs_type)
def test_delete(self): name = "yoname" rs = utils.create_rs(name=name) config = utils.create_config() if utils.is_reachable(config.api_host): utils.cleanup_rs() result = rs.list() self.assertIsInstance(result, list) self.assertEqual(0, len(result))
def test_git_repo_set_repo_invalid(self): name = "yoname" type = "gitRepo" mount_path = "/path/on/container" repo = object() config = utils.create_config() vol = K8sVolume(config=config, name=name, type=type, mount_path=mount_path) with self.assertRaises(SyntaxError): vol.set_git_repository(repo=repo)
def test_emptydir_init(self): name = "yoname" type = "emptyDir" mount_path = "/path/on/container" config = utils.create_config() vol = K8sVolume(config=config, name=name, type=type, mount_path=mount_path) self.assertIsNotNone(vol) self.assertIsInstance(vol, K8sVolume) self.assertEqual(type, vol.type)
def test_git_repo_set_revision(self): name = "yoname" type = "gitRepo" mount_path = "/path/on/container" rev = "22f1d8406d464b0c0874075539c1f2e96c253775" config = utils.create_config() vol = K8sVolume(config=config, name=name, type=type, mount_path=mount_path) vol.set_git_revision(rev) self.assertEqual(vol.model.git_revision, rev)
def test_init_windows_mount_path(self): name = 'yoname' type = 'emptyDir' mount_path = "C:\Program Files\Your Mom" config = utils.create_config() vol = K8sVolume(config=config, name=name, type=type, mount_path=mount_path) self.assertIsNotNone(vol) self.assertIsInstance(vol, K8sVolume) self.assertEqual('emptyDir', vol.type)
def test_git_repo_set_repository(self): name = "yoname" type = "gitRepo" mount_path = "/path/on/container" repo = "git@somewhere:me/my-git-repository.git" config = utils.create_config() vol = K8sVolume(config=config, name=name, type=type, mount_path=mount_path) vol.set_git_repository(repo) self.assertEqual(vol.model.git_repo, repo)
def test_nfs_set_revision_invalid_type(self): name = "yoname" type = "emptyDir" mount_path = "/path/on/container" rev = "22f1d8406d464b0c0874075539c1f2e96c253775" config = utils.create_config() vol = K8sVolume(config=config, name=name, type=type, mount_path=mount_path) with self.assertRaises(SyntaxError): vol.set_git_revision(rev)
def test_secret_set_name(self): name = "yoname" type = "secret" mount_path = "/path/on/container" secret_name = "yosecret" secret = utils.create_secret(name=secret_name) config = utils.create_config() vol = K8sVolume(config=config, name=name, type=type, mount_path=mount_path) vol.set_secret_name(secret) self.assertEqual(vol.model.secret_name, secret_name)
def __init__(self): self.storage = p4.Storage() self.reviews = {} self.exit_flag = True try: self.config = utils.create_config("config.json") self.discord = discord.Discord(self.config, "review_webhook") self.perforce = p4.init(self.config) except AssertionError as error: mutex.acquire() self.exit_flag = False mutex.release() assert False, error
def main(): """ main method """ if ARGS.phase == 'train': config = utils.create_config(ARGS) dset = TwentyNewsDataset('train') train_loader = DataLoader(dset, shuffle=False, batch_size=ARGS.bs) stats = dset.get_data_mtx().T print("No. of batches:", len(train_loader)) # if len(train_loader) > 1: train_batch_wise(stats, config, train_loader) # else: # train(stats, config, train_loader) elif ARGS.phase == 'extract': if ARGS.m: model_f = os.path.realpath(ARGS.m) model, config = utils.load_model_and_config(model_f) else: print("Specify the path to trained model with option -m") sys.exit() for set_name in ['train', 'test']: dset = TwentyNewsDataset(set_name) data_loader = DataLoader(dset, shuffle=False, batch_size=ARGS.bs) stats = dset.get_data_mtx().T print("No. of batches:", len(data_loader)) for bix, (data, _) in enumerate(data_loader): extract_ivectors(data, model, config, set_name + '_b' + str(bix)) mbase = os.path.splitext(os.path.basename(ARGS.m))[0] utils.merge_ivecs(config['ivecs_dir'], set_name, mbase, ARGS.xtr, len(data_loader)) utils.save_ivecs_to_h5(config['ivecs_dir'], mbase, ARGS.xtr) else: print("Invalid option. Should be train or extract.") sys.exit()
def test_add_volume_emptydir(self): name = "redis" image = "redis:3.0.7" c = K8sContainer(name=name, image=image) volname = "vol1" voltype = "emptyDir" volmount = "/path/on/container" config = utils.create_config() vol = K8sVolume(config=config, name=volname, type=voltype, mount_path=volmount) c.add_volume_mount(vol) self.assertIn("volumeMounts", c.model.model) self.assertIsInstance(c.model.model["volumeMounts"], list) self.assertEqual(1, len(c.model.model["volumeMounts"])) self.assertIsInstance(c.model.model["volumeMounts"][0], dict) for i in ["mountPath", "name"]: self.assertIn(i, c.model.model["volumeMounts"][0]) self.assertEqual(volname, c.model.model["volumeMounts"][0]["name"]) self.assertEqual(volmount, c.model.model["volumeMounts"][0]["mountPath"])
def Create(self, request, context): # validate request payload data = project_serializer.load(request) # parse create a bucket # slugify and create dataset name to create a bucket_name bucket_name = uuid.uuid4().hex # minio create bucket services.create_minio_bucket(bucket_name) # creation of a configuration file config = utils.create_config(data['name'], data['description']) # creation of a cookiecutter template workdir = os.path.join(os.path.dirname(__file__), 'data', bucket_name) templatedir = os.path.join(os.path.dirname(__file__), 'templates', data['template'].lower()) os.mkdir(workdir) cookiecutter(templatedir, output_dir=workdir, no_input=True, extra_context=config) # copy files back to minio bucket file_tree = services.sync_dir_to_minio_bucket( bucket_name, os.path.join(workdir, config.get('repo_name'))) # save to db members = data["members"] members.append(data["owner"]) data['members'] = list(set(members)) project = documents.Project(name=data['name'], description=data['description'], visibility=data['visibility'], template=data['template'], owner=data['owner'], members=data['members'], state=data["state"], repo_bucket=bucket_name).save() documents.Revision(commit="Generated by Ilyde", project=project.id, author=data['owner'], file_tree=file_tree).save() # clean all shutil.rmtree(workdir) return project_serializer.dump(project)
def main(argv): del argv # unused arg tf.io.gfile.makedirs(FLAGS.output_dir) logging.info('Saving checkpoints at %s', FLAGS.output_dir) tf.random.set_seed(FLAGS.seed) if FLAGS.use_gpu: logging.info('Use GPU') strategy = tf.distribute.MirroredStrategy() else: logging.info('Use TPU at %s', FLAGS.tpu if FLAGS.tpu is not None else 'local') resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu=FLAGS.tpu) tf.config.experimental_connect_to_cluster(resolver) tf.tpu.experimental.initialize_tpu_system(resolver) strategy = tf.distribute.TPUStrategy(resolver) batch_size = FLAGS.per_core_batch_size * FLAGS.num_cores test_batch_size = batch_size data_buffer_size = batch_size * 10 train_dataset_builder = ds.WikipediaToxicityDataset( split='train', data_dir=FLAGS.in_dataset_dir, shuffle_buffer_size=data_buffer_size) ind_dataset_builder = ds.WikipediaToxicityDataset( split='test', data_dir=FLAGS.in_dataset_dir, shuffle_buffer_size=data_buffer_size) ood_dataset_builder = ds.CivilCommentsDataset( split='test', data_dir=FLAGS.ood_dataset_dir, shuffle_buffer_size=data_buffer_size) ood_identity_dataset_builder = ds.CivilCommentsIdentitiesDataset( split='test', data_dir=FLAGS.identity_dataset_dir, shuffle_buffer_size=data_buffer_size) train_dataset_builders = { 'wikipedia_toxicity_subtypes': train_dataset_builder } test_dataset_builders = { 'ind': ind_dataset_builder, 'ood': ood_dataset_builder, 'ood_identity': ood_identity_dataset_builder, } if FLAGS.prediction_mode and FLAGS.identity_prediction: for dataset_name in utils.IDENTITY_LABELS: if utils.NUM_EXAMPLES[dataset_name]['test'] > 100: test_dataset_builders[dataset_name] = ds.CivilCommentsIdentitiesDataset( split='test', data_dir=os.path.join( FLAGS.identity_specific_dataset_dir, dataset_name), shuffle_buffer_size=data_buffer_size) for dataset_name in utils.IDENTITY_TYPES: if utils.NUM_EXAMPLES[dataset_name]['test'] > 100: test_dataset_builders[dataset_name] = ds.CivilCommentsIdentitiesDataset( split='test', data_dir=os.path.join( FLAGS.identity_type_dataset_dir, dataset_name), shuffle_buffer_size=data_buffer_size) class_weight = utils.create_class_weight( train_dataset_builders, test_dataset_builders) logging.info('class_weight: %s', str(class_weight)) ds_info = train_dataset_builder.tfds_info # Positive and negative classes. num_classes = ds_info.metadata['num_classes'] train_datasets = {} dataset_steps_per_epoch = {} total_steps_per_epoch = 0 # TODO(jereliu): Apply strategy.experimental_distribute_dataset to the # dataset_builders. for dataset_name, dataset_builder in train_dataset_builders.items(): train_datasets[dataset_name] = dataset_builder.load( batch_size=FLAGS.per_core_batch_size) dataset_steps_per_epoch[dataset_name] = ( dataset_builder.num_examples // batch_size) total_steps_per_epoch += dataset_steps_per_epoch[dataset_name] test_datasets = {} steps_per_eval = {} for dataset_name, dataset_builder in test_dataset_builders.items(): test_datasets[dataset_name] = dataset_builder.load( batch_size=test_batch_size) if dataset_name in ['ind', 'ood', 'ood_identity']: steps_per_eval[dataset_name] = ( dataset_builder.num_examples // test_batch_size) else: steps_per_eval[dataset_name] = ( utils.NUM_EXAMPLES[dataset_name]['test'] // test_batch_size) if FLAGS.use_bfloat16: policy = tf.keras.mixed_precision.experimental.Policy('mixed_bfloat16') tf.keras.mixed_precision.experimental.set_policy(policy) summary_writer = tf.summary.create_file_writer( os.path.join(FLAGS.output_dir, 'summaries')) with strategy.scope(): logging.info('Building BERT %s model', FLAGS.bert_model_type) logging.info('use_gp_layer=%s', FLAGS.use_gp_layer) logging.info('use_spec_norm_att=%s', FLAGS.use_spec_norm_att) logging.info('use_spec_norm_ffn=%s', FLAGS.use_spec_norm_ffn) logging.info('use_layer_norm_att=%s', FLAGS.use_layer_norm_att) logging.info('use_layer_norm_ffn=%s', FLAGS.use_layer_norm_ffn) bert_config_dir, bert_ckpt_dir = utils.resolve_bert_ckpt_and_config_dir( FLAGS.bert_model_type, FLAGS.bert_dir, FLAGS.bert_config_dir, FLAGS.bert_ckpt_dir) bert_config = utils.create_config(bert_config_dir) gp_layer_kwargs = dict( num_inducing=FLAGS.gp_hidden_dim, gp_kernel_scale=FLAGS.gp_scale, gp_output_bias=FLAGS.gp_bias, normalize_input=FLAGS.gp_input_normalization, gp_cov_momentum=FLAGS.gp_cov_discount_factor, gp_cov_ridge_penalty=FLAGS.gp_cov_ridge_penalty) spec_norm_kwargs = dict( iteration=FLAGS.spec_norm_iteration, norm_multiplier=FLAGS.spec_norm_bound) model, bert_encoder = ub.models.SngpBertBuilder( num_classes=num_classes, bert_config=bert_config, gp_layer_kwargs=gp_layer_kwargs, spec_norm_kwargs=spec_norm_kwargs, use_gp_layer=FLAGS.use_gp_layer, use_spec_norm_att=FLAGS.use_spec_norm_att, use_spec_norm_ffn=FLAGS.use_spec_norm_ffn, use_layer_norm_att=FLAGS.use_layer_norm_att, use_layer_norm_ffn=FLAGS.use_layer_norm_ffn, use_spec_norm_plr=FLAGS.use_spec_norm_plr) # Create an AdamW optimizer with beta_2=0.999, epsilon=1e-6. optimizer = utils.create_optimizer( FLAGS.base_learning_rate, steps_per_epoch=total_steps_per_epoch, epochs=FLAGS.train_epochs, warmup_proportion=FLAGS.warmup_proportion, beta_1=1.0 - FLAGS.one_minus_momentum) logging.info('Model input shape: %s', model.input_shape) logging.info('Model output shape: %s', model.output_shape) logging.info('Model number of weights: %s', model.count_params()) metrics = { 'train/negative_log_likelihood': tf.keras.metrics.Mean(), 'train/accuracy': tf.keras.metrics.Accuracy(), 'train/accuracy_weighted': tf.keras.metrics.Accuracy(), 'train/auroc': tf.keras.metrics.AUC(), 'train/loss': tf.keras.metrics.Mean(), 'train/ece': rm.metrics.ExpectedCalibrationError( num_bins=FLAGS.num_bins), 'train/precision': tf.keras.metrics.Precision(), 'train/recall': tf.keras.metrics.Recall(), 'train/f1': tfa_metrics.F1Score( num_classes=num_classes, average='micro', threshold=FLAGS.ece_label_threshold), } checkpoint = tf.train.Checkpoint(model=model, optimizer=optimizer) if FLAGS.prediction_mode: latest_checkpoint = tf.train.latest_checkpoint(FLAGS.eval_checkpoint_dir) else: latest_checkpoint = tf.train.latest_checkpoint(FLAGS.output_dir) initial_epoch = 0 if latest_checkpoint: # checkpoint.restore must be within a strategy.scope() so that optimizer # slot variables are mirrored. checkpoint.restore(latest_checkpoint) logging.info('Loaded checkpoint %s', latest_checkpoint) initial_epoch = optimizer.iterations.numpy() // total_steps_per_epoch else: # load BERT from initial checkpoint bert_encoder, _, _ = utils.load_bert_weight_from_ckpt( bert_model=bert_encoder, bert_ckpt_dir=bert_ckpt_dir, repl_patterns=ub.models.bert_sngp.CHECKPOINT_REPL_PATTERNS) logging.info('Loaded BERT checkpoint %s', bert_ckpt_dir) metrics.update({ 'test/negative_log_likelihood': tf.keras.metrics.Mean(), 'test/auroc': tf.keras.metrics.AUC(curve='ROC'), 'test/aupr': tf.keras.metrics.AUC(curve='PR'), 'test/brier': tf.keras.metrics.MeanSquaredError(), 'test/brier_weighted': tf.keras.metrics.MeanSquaredError(), 'test/ece': rm.metrics.ExpectedCalibrationError(num_bins=FLAGS.num_bins), 'test/acc': tf.keras.metrics.Accuracy(), 'test/acc_weighted': tf.keras.metrics.Accuracy(), 'test/eval_time': tf.keras.metrics.Mean(), 'test/stddev': tf.keras.metrics.Mean(), 'test/precision': tf.keras.metrics.Precision(), 'test/recall': tf.keras.metrics.Recall(), 'test/f1': tfa_metrics.F1Score( num_classes=num_classes, average='micro', threshold=FLAGS.ece_label_threshold), 'test/calibration_auroc': tc_metrics.CalibrationAUC(curve='ROC'), 'test/calibration_auprc': tc_metrics.CalibrationAUC(curve='PR') }) for fraction in FLAGS.fractions: metrics.update({ 'test_collab_acc/collab_acc_{}'.format(fraction): rm.metrics.OracleCollaborativeAccuracy( fraction=float(fraction), num_bins=FLAGS.num_bins) }) metrics.update({ 'test_abstain_prec/abstain_prec_{}'.format(fraction): tc_metrics.AbstainPrecision(abstain_fraction=float(fraction)) }) metrics.update({ 'test_abstain_recall/abstain_recall_{}'.format(fraction): tc_metrics.AbstainRecall(abstain_fraction=float(fraction)) }) for dataset_name, test_dataset in test_datasets.items(): if dataset_name != 'ind': metrics.update({ 'test/nll_{}'.format(dataset_name): tf.keras.metrics.Mean(), 'test/auroc_{}'.format(dataset_name): tf.keras.metrics.AUC(curve='ROC'), 'test/aupr_{}'.format(dataset_name): tf.keras.metrics.AUC(curve='PR'), 'test/brier_{}'.format(dataset_name): tf.keras.metrics.MeanSquaredError(), 'test/brier_weighted_{}'.format(dataset_name): tf.keras.metrics.MeanSquaredError(), 'test/ece_{}'.format(dataset_name): rm.metrics.ExpectedCalibrationError(num_bins=FLAGS.num_bins), 'test/acc_{}'.format(dataset_name): tf.keras.metrics.Accuracy(), 'test/acc_weighted_{}'.format(dataset_name): tf.keras.metrics.Accuracy(), 'test/eval_time_{}'.format(dataset_name): tf.keras.metrics.Mean(), 'test/stddev_{}'.format(dataset_name): tf.keras.metrics.Mean(), 'test/precision_{}'.format(dataset_name): tf.keras.metrics.Precision(), 'test/recall_{}'.format(dataset_name): tf.keras.metrics.Recall(), 'test/f1_{}'.format(dataset_name): tfa_metrics.F1Score( num_classes=num_classes, average='micro', threshold=FLAGS.ece_label_threshold), 'test/calibration_auroc_{}'.format(dataset_name): tc_metrics.CalibrationAUC(curve='ROC'), 'test/calibration_auprc_{}'.format(dataset_name): tc_metrics.CalibrationAUC(curve='PR'), }) for fraction in FLAGS.fractions: metrics.update({ 'test_collab_acc/collab_acc_{}_{}'.format(fraction, dataset_name): rm.metrics.OracleCollaborativeAccuracy( fraction=float(fraction), num_bins=FLAGS.num_bins) }) metrics.update({ 'test_abstain_prec/abstain_prec_{}_{}'.format( fraction, dataset_name): tc_metrics.AbstainPrecision(abstain_fraction=float(fraction)) }) metrics.update({ 'test_abstain_recall/abstain_recall_{}_{}'.format( fraction, dataset_name): tc_metrics.AbstainRecall(abstain_fraction=float(fraction)) }) @tf.function def generate_sample_weight(labels, class_weight, label_threshold=0.7): """Generate sample weight for weighted accuracy calculation.""" if label_threshold != 0.7: logging.warning('The class weight was based on `label_threshold` = 0.7, ' 'and weighted accuracy/brier will be meaningless if ' '`label_threshold` is not equal to this value, which is ' 'recommended by Jigsaw Conversation AI team.') labels_int = tf.cast(labels > label_threshold, tf.int32) sample_weight = tf.gather(class_weight, labels_int) return sample_weight @tf.function def train_step(iterator, dataset_name, num_steps): """Training StepFn.""" def step_fn(inputs): """Per-Replica StepFn.""" features, labels, _ = utils.create_feature_and_label(inputs) with tf.GradientTape() as tape: logits = model(features, training=True) if isinstance(logits, (list, tuple)): # If model returns a tuple of (logits, covmat), extract logits logits, _ = logits if FLAGS.use_bfloat16: logits = tf.cast(logits, tf.float32) loss_logits = tf.squeeze(logits, axis=1) if FLAGS.loss_type == 'cross_entropy': logging.info('Using cross entropy loss') negative_log_likelihood = tf.nn.sigmoid_cross_entropy_with_logits( labels, loss_logits) elif FLAGS.loss_type == 'focal_cross_entropy': logging.info('Using focal cross entropy loss') negative_log_likelihood = tfa_losses.sigmoid_focal_crossentropy( labels, loss_logits, alpha=FLAGS.focal_loss_alpha, gamma=FLAGS.focal_loss_gamma, from_logits=True) elif FLAGS.loss_type == 'mse': logging.info('Using mean squared error loss') loss_probs = tf.nn.sigmoid(loss_logits) negative_log_likelihood = tf.keras.losses.mean_squared_error( labels, loss_probs) elif FLAGS.loss_type == 'mae': logging.info('Using mean absolute error loss') loss_probs = tf.nn.sigmoid(loss_logits) negative_log_likelihood = tf.keras.losses.mean_absolute_error( labels, loss_probs) negative_log_likelihood = tf.reduce_mean(negative_log_likelihood) l2_loss = sum(model.losses) loss = negative_log_likelihood + l2_loss # Scale the loss given the TPUStrategy will reduce sum all gradients. scaled_loss = loss / strategy.num_replicas_in_sync grads = tape.gradient(scaled_loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) probs = tf.nn.sigmoid(logits) # Cast labels to discrete for ECE computation. ece_labels = tf.cast(labels > FLAGS.ece_label_threshold, tf.float32) one_hot_labels = tf.one_hot(tf.cast(ece_labels, tf.int32), depth=num_classes) ece_probs = tf.concat([1. - probs, probs], axis=1) auc_probs = tf.squeeze(probs, axis=1) pred_labels = tf.math.argmax(ece_probs, axis=-1) sample_weight = generate_sample_weight( labels, class_weight['train/{}'.format(dataset_name)], FLAGS.ece_label_threshold) metrics['train/negative_log_likelihood'].update_state( negative_log_likelihood) metrics['train/accuracy'].update_state(labels, pred_labels) metrics['train/accuracy_weighted'].update_state( ece_labels, pred_labels, sample_weight=sample_weight) metrics['train/auroc'].update_state(labels, auc_probs) metrics['train/loss'].update_state(loss) metrics['train/ece'].add_batch(ece_probs, label=ece_labels) metrics['train/precision'].update_state(ece_labels, pred_labels) metrics['train/recall'].update_state(ece_labels, pred_labels) metrics['train/f1'].update_state(one_hot_labels, ece_probs) for _ in tf.range(tf.cast(num_steps, tf.int32)): strategy.run(step_fn, args=(next(iterator),)) @tf.function def test_step(iterator, dataset_name): """Evaluation StepFn.""" def step_fn(inputs): """Per-Replica StepFn.""" features, labels, _ = utils.create_feature_and_label(inputs) eval_start_time = time.time() # Compute ensemble prediction over Monte Carlo forward-pass samples. logits_list = [] stddev_list = [] for _ in range(FLAGS.num_mc_samples): logits = model(features, training=False) if isinstance(logits, (list, tuple)): # If model returns a tuple of (logits, covmat), extract both. logits, covmat = logits else: covmat = tf.eye(test_batch_size) if FLAGS.use_bfloat16: logits = tf.cast(logits, tf.float32) covmat = tf.cast(covmat, tf.float32) logits = ed.layers.utils.mean_field_logits( logits, covmat, mean_field_factor=FLAGS.gp_mean_field_factor) stddev = tf.sqrt(tf.linalg.diag_part(covmat)) logits_list.append(logits) stddev_list.append(stddev) eval_time = (time.time() - eval_start_time) / FLAGS.per_core_batch_size # Logits dimension is (num_samples, batch_size, num_classes). logits_list = tf.stack(logits_list, axis=0) stddev_list = tf.stack(stddev_list, axis=0) stddev = tf.reduce_mean(stddev_list, axis=0) probs_list = tf.nn.sigmoid(logits_list) probs = tf.reduce_mean(probs_list, axis=0) # Cast labels to discrete for ECE computation. ece_labels = tf.cast(labels > FLAGS.ece_label_threshold, tf.float32) one_hot_labels = tf.one_hot(tf.cast(ece_labels, tf.int32), depth=num_classes) ece_probs = tf.concat([1. - probs, probs], axis=1) pred_labels = tf.math.argmax(ece_probs, axis=-1) auc_probs = tf.squeeze(probs, axis=1) # Use normalized binary predictive variance as the confidence score. # Since the prediction variance p*(1-p) is within range (0, 0.25), # normalize it by maximum value so the confidence is between (0, 1). calib_confidence = 1. - probs * (1. - probs) / .25 ce = tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.broadcast_to( labels, [FLAGS.num_mc_samples, labels.shape[0]]), logits=tf.squeeze(logits_list, axis=-1) ) negative_log_likelihood = -tf.reduce_logsumexp( -ce, axis=0) + tf.math.log(float(FLAGS.num_mc_samples)) negative_log_likelihood = tf.reduce_mean(negative_log_likelihood) sample_weight = generate_sample_weight( labels, class_weight['test/{}'.format(dataset_name)], FLAGS.ece_label_threshold) if dataset_name == 'ind': metrics['test/negative_log_likelihood'].update_state( negative_log_likelihood) metrics['test/auroc'].update_state(labels, auc_probs) metrics['test/aupr'].update_state(labels, auc_probs) metrics['test/brier'].update_state(labels, auc_probs) metrics['test/brier_weighted'].update_state( tf.expand_dims(labels, -1), probs, sample_weight=sample_weight) metrics['test/ece'].add_batch(ece_probs, label=ece_labels) metrics['test/acc'].update_state(ece_labels, pred_labels) metrics['test/acc_weighted'].update_state( ece_labels, pred_labels, sample_weight=sample_weight) metrics['test/eval_time'].update_state(eval_time) metrics['test/stddev'].update_state(stddev) metrics['test/precision'].update_state(ece_labels, pred_labels) metrics['test/recall'].update_state(ece_labels, pred_labels) metrics['test/f1'].update_state(one_hot_labels, ece_probs) metrics['test/calibration_auroc'].update_state(ece_labels, pred_labels, calib_confidence) metrics['test/calibration_auprc'].update_state(ece_labels, pred_labels, calib_confidence) for fraction in FLAGS.fractions: metrics['test_collab_acc/collab_acc_{}'.format( fraction)].add_batch(ece_probs, label=ece_labels) metrics['test_abstain_prec/abstain_prec_{}'.format( fraction)].update_state(ece_labels, pred_labels, calib_confidence) metrics['test_abstain_recall/abstain_recall_{}'.format( fraction)].update_state(ece_labels, pred_labels, calib_confidence) else: metrics['test/nll_{}'.format(dataset_name)].update_state( negative_log_likelihood) metrics['test/auroc_{}'.format(dataset_name)].update_state( labels, auc_probs) metrics['test/aupr_{}'.format(dataset_name)].update_state( labels, auc_probs) metrics['test/brier_{}'.format(dataset_name)].update_state( labels, auc_probs) metrics['test/brier_weighted_{}'.format(dataset_name)].update_state( tf.expand_dims(labels, -1), probs, sample_weight=sample_weight) metrics['test/ece_{}'.format(dataset_name)].add_batch( ece_probs, label=ece_labels) metrics['test/acc_{}'.format(dataset_name)].update_state( ece_labels, pred_labels) metrics['test/acc_weighted_{}'.format(dataset_name)].update_state( ece_labels, pred_labels, sample_weight=sample_weight) metrics['test/eval_time_{}'.format(dataset_name)].update_state( eval_time) metrics['test/stddev_{}'.format(dataset_name)].update_state(stddev) metrics['test/precision_{}'.format(dataset_name)].update_state( ece_labels, pred_labels) metrics['test/recall_{}'.format(dataset_name)].update_state( ece_labels, pred_labels) metrics['test/f1_{}'.format(dataset_name)].update_state( one_hot_labels, ece_probs) metrics['test/calibration_auroc_{}'.format(dataset_name)].update_state( ece_labels, pred_labels, calib_confidence) metrics['test/calibration_auprc_{}'.format(dataset_name)].update_state( ece_labels, pred_labels, calib_confidence) for fraction in FLAGS.fractions: metrics['test_collab_acc/collab_acc_{}_{}'.format( fraction, dataset_name)].add_batch(ece_probs, label=ece_labels) metrics['test_abstain_prec/abstain_prec_{}_{}'.format( fraction, dataset_name)].update_state(ece_labels, pred_labels, calib_confidence) metrics['test_abstain_recall/abstain_recall_{}_{}'.format( fraction, dataset_name)].update_state(ece_labels, pred_labels, calib_confidence) strategy.run(step_fn, args=(next(iterator),)) @tf.function def final_eval_step(iterator): """Final Evaluation StepFn to save prediction to directory.""" def step_fn(inputs): bert_features, labels, additional_labels = utils.create_feature_and_label( inputs) logits = model(bert_features, training=False) if isinstance(logits, (list, tuple)): # If model returns a tuple of (logits, covmat), extract both. logits, covmat = logits else: covmat = tf.eye(test_batch_size) if FLAGS.use_bfloat16: logits = tf.cast(logits, tf.float32) covmat = tf.cast(covmat, tf.float32) logits = ed.layers.utils.mean_field_logits( logits, covmat, mean_field_factor=FLAGS.gp_mean_field_factor) features = inputs['input_ids'] return features, logits, labels, additional_labels (per_replica_texts, per_replica_logits, per_replica_labels, per_replica_additional_labels) = ( strategy.run(step_fn, args=(next(iterator),))) if strategy.num_replicas_in_sync > 1: texts_list = tf.concat(per_replica_texts.values, axis=0) logits_list = tf.concat(per_replica_logits.values, axis=0) labels_list = tf.concat(per_replica_labels.values, axis=0) additional_labels_dict = {} for additional_label in utils.IDENTITY_LABELS: if additional_label in per_replica_additional_labels: additional_labels_dict[additional_label] = tf.concat( per_replica_additional_labels[additional_label], axis=0) else: texts_list = per_replica_texts logits_list = per_replica_logits labels_list = per_replica_labels additional_labels_dict = {} for additional_label in utils.IDENTITY_LABELS: if additional_label in per_replica_additional_labels: additional_labels_dict[ additional_label] = per_replica_additional_labels[ additional_label] return texts_list, logits_list, labels_list, additional_labels_dict if FLAGS.prediction_mode: # Prediction and exit. for dataset_name, test_dataset in test_datasets.items(): test_iterator = iter(test_dataset) # pytype: disable=wrong-arg-types message = 'Final eval on dataset {}'.format(dataset_name) logging.info(message) texts_all = [] logits_all = [] labels_all = [] additional_labels_all_dict = {} if 'identity' in dataset_name: for identity_label_name in utils.IDENTITY_LABELS: additional_labels_all_dict[identity_label_name] = [] try: with tf.experimental.async_scope(): for step in range(steps_per_eval[dataset_name]): if step % 20 == 0: message = 'Starting to run eval step {}/{} of dataset: {}'.format( step, steps_per_eval[dataset_name], dataset_name) logging.info(message) (text_step, logits_step, labels_step, additional_labels_dict_step) = final_eval_step(test_iterator) texts_all.append(text_step) logits_all.append(logits_step) labels_all.append(labels_step) if 'identity' in dataset_name: for identity_label_name in utils.IDENTITY_LABELS: additional_labels_all_dict[identity_label_name].append( additional_labels_dict_step[identity_label_name]) except (StopIteration, tf.errors.OutOfRangeError): tf.experimental.async_clear_error() logging.info('Done with eval on %s', dataset_name) texts_all = tf.concat(texts_all, axis=0) logits_all = tf.concat(logits_all, axis=0) labels_all = tf.concat(labels_all, axis=0) additional_labels_all = [] if additional_labels_all_dict: for identity_label_name in utils.IDENTITY_LABELS: additional_labels_all.append( tf.concat( additional_labels_all_dict[identity_label_name], axis=0)) additional_labels_all = tf.convert_to_tensor(additional_labels_all) utils.save_prediction( texts_all.numpy(), path=os.path.join(FLAGS.output_dir, 'texts_{}'.format(dataset_name))) utils.save_prediction( labels_all.numpy(), path=os.path.join(FLAGS.output_dir, 'labels_{}'.format(dataset_name))) utils.save_prediction( logits_all.numpy(), path=os.path.join(FLAGS.output_dir, 'logits_{}'.format(dataset_name))) if 'identity' in dataset_name: utils.save_prediction( additional_labels_all.numpy(), path=os.path.join(FLAGS.output_dir, 'additional_labels_{}'.format(dataset_name))) logging.info('Done with testing on %s', dataset_name) else: # Execute train / eval loop. start_time = time.time() train_iterators = {} for dataset_name, train_dataset in train_datasets.items(): train_iterators[dataset_name] = iter(train_dataset) for epoch in range(initial_epoch, FLAGS.train_epochs): logging.info('Starting to run epoch: %s', epoch) for dataset_name, train_iterator in train_iterators.items(): try: with tf.experimental.async_scope(): train_step( train_iterator, dataset_name, dataset_steps_per_epoch[dataset_name]) current_step = ( epoch * total_steps_per_epoch + dataset_steps_per_epoch[dataset_name]) max_steps = total_steps_per_epoch * FLAGS.train_epochs time_elapsed = time.time() - start_time steps_per_sec = float(current_step) / time_elapsed eta_seconds = (max_steps - current_step) / steps_per_sec message = ('{:.1%} completion: epoch {:d}/{:d}. {:.1f} steps/s. ' 'ETA: {:.0f} min. Time elapsed: {:.0f} min'.format( current_step / max_steps, epoch + 1, FLAGS.train_epochs, steps_per_sec, eta_seconds / 60, time_elapsed / 60)) logging.info(message) except (StopIteration, tf.errors.OutOfRangeError): tf.experimental.async_clear_error() logging.info('Done with testing on %s', dataset_name) if epoch % FLAGS.evaluation_interval == 0: for dataset_name, test_dataset in test_datasets.items(): test_iterator = iter(test_dataset) logging.info('Testing on dataset %s', dataset_name) try: with tf.experimental.async_scope(): for step in range(steps_per_eval[dataset_name]): if step % 20 == 0: logging.info('Starting to run eval step %s/%s of epoch: %s', step, steps_per_eval[dataset_name], epoch) test_step(test_iterator, dataset_name) except (StopIteration, tf.errors.OutOfRangeError): tf.experimental.async_clear_error() logging.info('Done with testing on %s', dataset_name) logging.info('Train Loss: %.4f, ECE: %.2f, Accuracy: %.2f', metrics['train/loss'].result(), metrics['train/ece'].result(), metrics['train/accuracy'].result()) total_results = { name: metric.result() for name, metric in metrics.items() } # Metrics from Robustness Metrics (like ECE) will return a dict with a # single key/value, instead of a scalar. total_results = { k: (list(v.values())[0] if isinstance(v, dict) else v) for k, v in total_results.items() } with summary_writer.as_default(): for name, result in total_results.items(): tf.summary.scalar(name, result, step=epoch + 1) for metric in metrics.values(): metric.reset_states() checkpoint_interval = min(FLAGS.checkpoint_interval, FLAGS.train_epochs) if checkpoint_interval > 0 and (epoch + 1) % checkpoint_interval == 0: checkpoint_name = checkpoint.save( os.path.join(FLAGS.output_dir, 'checkpoint')) logging.info('Saved checkpoint to %s', checkpoint_name) # Save model in SavedModel format on exit. final_save_name = os.path.join(FLAGS.output_dir, 'model') model.save(final_save_name) logging.info('Saved model to %s', final_save_name) with summary_writer.as_default(): hp.hparams({ 'base_learning_rate': FLAGS.base_learning_rate, 'one_minus_momentum': FLAGS.one_minus_momentum, 'gp_mean_field_factor': FLAGS.gp_mean_field_factor, })
def main(argv): del argv # unused arg if not FLAGS.use_gpu: raise ValueError('Only GPU is currently supported.') if FLAGS.num_cores > 1: raise ValueError('Only a single accelerator is currently supported.') tf.random.set_seed(FLAGS.seed) logging.info('Model checkpoint will be saved at %s', FLAGS.output_dir) tf.io.gfile.makedirs(FLAGS.output_dir) batch_size = FLAGS.per_core_batch_size * FLAGS.num_cores test_batch_size = batch_size data_buffer_size = batch_size * 10 ind_dataset_builder = ds.WikipediaToxicityDataset( split='test', data_dir=FLAGS.in_dataset_dir, shuffle_buffer_size=data_buffer_size) ood_dataset_builder = ds.CivilCommentsDataset( split='test', data_dir=FLAGS.ood_dataset_dir, shuffle_buffer_size=data_buffer_size) ood_identity_dataset_builder = ds.CivilCommentsIdentitiesDataset( split='test', data_dir=FLAGS.identity_dataset_dir, shuffle_buffer_size=data_buffer_size) test_dataset_builders = { 'ind': ind_dataset_builder, 'ood': ood_dataset_builder, 'ood_identity': ood_identity_dataset_builder, } class_weight = utils.create_class_weight( test_dataset_builders=test_dataset_builders) logging.info('class_weight: %s', str(class_weight)) ds_info = ind_dataset_builder.tfds_info # Positive and negative classes. num_classes = ds_info.metadata['num_classes'] test_datasets = {} steps_per_eval = {} for dataset_name, dataset_builder in test_dataset_builders.items(): test_datasets[dataset_name] = dataset_builder.load( batch_size=test_batch_size) steps_per_eval[dataset_name] = ( dataset_builder.num_examples // test_batch_size) logging.info('Building %s model', FLAGS.model_family) bert_config_dir, _ = utils.resolve_bert_ckpt_and_config_dir( FLAGS.bert_model_type, FLAGS.bert_dir, FLAGS.bert_config_dir, FLAGS.bert_ckpt_dir) bert_config = utils.create_config(bert_config_dir) gp_layer_kwargs = dict( num_inducing=FLAGS.gp_hidden_dim, gp_kernel_scale=FLAGS.gp_scale, gp_output_bias=FLAGS.gp_bias, normalize_input=FLAGS.gp_input_normalization, gp_cov_momentum=FLAGS.gp_cov_discount_factor, gp_cov_ridge_penalty=FLAGS.gp_cov_ridge_penalty) spec_norm_kwargs = dict( iteration=FLAGS.spec_norm_iteration, norm_multiplier=FLAGS.spec_norm_bound) model, _ = ub.models.SngpBertBuilder( num_classes=num_classes, bert_config=bert_config, gp_layer_kwargs=gp_layer_kwargs, spec_norm_kwargs=spec_norm_kwargs, use_gp_layer=FLAGS.use_gp_layer, use_spec_norm_att=FLAGS.use_spec_norm_att, use_spec_norm_ffn=FLAGS.use_spec_norm_ffn, use_layer_norm_att=FLAGS.use_layer_norm_att, use_layer_norm_ffn=FLAGS.use_layer_norm_ffn, use_spec_norm_plr=FLAGS.use_spec_norm_plr) logging.info('Model input shape: %s', model.input_shape) logging.info('Model output shape: %s', model.output_shape) logging.info('Model number of weights: %s', model.count_params()) # Search for checkpoints from their index file; then remove the index suffix. ensemble_filenames = tf.io.gfile.glob( os.path.join(FLAGS.checkpoint_dir, '**/*.index')) ensemble_filenames = [filename[:-6] for filename in ensemble_filenames] if FLAGS.num_models > len(ensemble_filenames): raise ValueError('Number of models to be included in the ensemble ' 'should be less than total number of models in ' 'the checkpoint_dir.') ensemble_filenames = ensemble_filenames[:FLAGS.num_models] ensemble_size = len(ensemble_filenames) logging.info('Ensemble size: %s', ensemble_size) logging.info('Ensemble number of weights: %s', ensemble_size * model.count_params()) logging.info('Ensemble filenames: %s', str(ensemble_filenames)) checkpoint = tf.train.Checkpoint(model=model) # Write model predictions to files. num_datasets = len(test_datasets) for m, ensemble_filename in enumerate(ensemble_filenames): checkpoint.restore(ensemble_filename).assert_existing_objects_matched() for n, (dataset_name, test_dataset) in enumerate(test_datasets.items()): filename = '{dataset}_{member}.npy'.format(dataset=dataset_name, member=m) filename = os.path.join(FLAGS.output_dir, filename) if not tf.io.gfile.exists(filename): logits_list = [] test_iterator = iter(test_dataset) for step in range(steps_per_eval[dataset_name]): try: inputs = next(test_iterator) except StopIteration: continue features, labels, _ = utils.create_feature_and_label(inputs) logits = model(features, training=False) if isinstance(logits, (list, tuple)): # If model returns a tuple of (logits, covmat), extract both. logits, covmat = logits else: covmat = tf.eye(test_batch_size) if FLAGS.use_bfloat16: logits = tf.cast(logits, tf.float32) covmat = tf.cast(covmat, tf.float32) logits = ed.layers.utils.mean_field_logits( logits, covmat, mean_field_factor=FLAGS.gp_mean_field_factor_ensemble) logits_list.append(logits) logits_all = tf.concat(logits_list, axis=0) with tf.io.gfile.GFile(filename, 'w') as f: np.save(f, logits_all.numpy()) percent = (m * num_datasets + (n + 1)) / (ensemble_size * num_datasets) message = ('{:.1%} completion for prediction: ensemble member {:d}/{:d}. ' 'Dataset {:d}/{:d}'.format(percent, m + 1, ensemble_size, n + 1, num_datasets)) logging.info(message) metrics = { 'test/negative_log_likelihood': tf.keras.metrics.Mean(), 'test/auroc': tf.keras.metrics.AUC(curve='ROC'), 'test/aupr': tf.keras.metrics.AUC(curve='PR'), 'test/brier': tf.keras.metrics.MeanSquaredError(), 'test/brier_weighted': tf.keras.metrics.MeanSquaredError(), 'test/ece': um.ExpectedCalibrationError(num_bins=FLAGS.num_bins), 'test/acc': tf.keras.metrics.Accuracy(), 'test/acc_weighted': tf.keras.metrics.Accuracy(), 'test/precision': tf.keras.metrics.Precision(), 'test/recall': tf.keras.metrics.Recall(), 'test/f1': tfa_metrics.F1Score( num_classes=num_classes, average='micro', threshold=FLAGS.ece_label_threshold) } for fraction in FLAGS.fractions: metrics.update({ 'test_collab_acc/collab_acc_{}'.format(fraction): um.OracleCollaborativeAccuracy( fraction=float(fraction), num_bins=FLAGS.num_bins) }) for dataset_name, test_dataset in test_datasets.items(): if dataset_name != 'ind': metrics.update({ 'test/nll_{}'.format(dataset_name): tf.keras.metrics.Mean(), 'test/auroc_{}'.format(dataset_name): tf.keras.metrics.AUC(curve='ROC'), 'test/aupr_{}'.format(dataset_name): tf.keras.metrics.AUC(curve='PR'), 'test/brier_{}'.format(dataset_name): tf.keras.metrics.MeanSquaredError(), 'test/brier_weighted_{}'.format(dataset_name): tf.keras.metrics.MeanSquaredError(), 'test/ece_{}'.format(dataset_name): um.ExpectedCalibrationError(num_bins=FLAGS.num_bins), 'test/acc_weighted_{}'.format(dataset_name): tf.keras.metrics.Accuracy(), 'test/acc_{}'.format(dataset_name): tf.keras.metrics.Accuracy(), 'test/precision_{}'.format(dataset_name): tf.keras.metrics.Precision(), 'test/recall_{}'.format(dataset_name): tf.keras.metrics.Recall(), 'test/f1_{}'.format(dataset_name): tfa_metrics.F1Score( num_classes=num_classes, average='micro', threshold=FLAGS.ece_label_threshold) }) for fraction in FLAGS.fractions: metrics.update({ 'test_collab_acc/collab_acc_{}_{}'.format(fraction, dataset_name): um.OracleCollaborativeAccuracy( fraction=float(fraction), num_bins=FLAGS.num_bins) }) @tf.function def generate_sample_weight(labels, class_weight, label_threshold=0.7): """Generate sample weight for weighted accuracy calculation.""" if label_threshold != 0.7: logging.warning('The class weight was based on `label_threshold` = 0.7, ' 'and weighted accuracy/brier will be meaningless if ' '`label_threshold` is not equal to this value, which is ' 'recommended by Jigsaw Conversation AI team.') labels_int = tf.cast(labels > label_threshold, tf.int32) sample_weight = tf.gather(class_weight, labels_int) return sample_weight # Evaluate model predictions. for n, (dataset_name, test_dataset) in enumerate(test_datasets.items()): logits_dataset = [] for m in range(ensemble_size): filename = '{dataset}_{member}.npy'.format(dataset=dataset_name, member=m) filename = os.path.join(FLAGS.output_dir, filename) with tf.io.gfile.GFile(filename, 'rb') as f: logits_dataset.append(np.load(f)) logits_dataset = tf.convert_to_tensor(logits_dataset) test_iterator = iter(test_dataset) texts_list = [] logits_list = [] labels_list = [] # Use dict to collect additional labels specified by additional label names. # Here we use `OrderedDict` to get consistent ordering for this dict so # we can retrieve the predictions for each identity labels in Colab. additional_labels_dict = collections.OrderedDict() for step in range(steps_per_eval[dataset_name]): try: inputs = next(test_iterator) # type: Mapping[Text, tf.Tensor] # pytype: disable=annotation-type-mismatch except StopIteration: continue features, labels, additional_labels = ( utils.create_feature_and_label(inputs)) logits = logits_dataset[:, (step * batch_size):((step + 1) * batch_size)] loss_logits = tf.squeeze(logits, axis=-1) negative_log_likelihood = um.ensemble_cross_entropy( labels, loss_logits, binary=True) per_probs = tf.nn.sigmoid(logits) probs = tf.reduce_mean(per_probs, axis=0) # Cast labels to discrete for ECE computation ece_labels = tf.cast(labels > FLAGS.ece_label_threshold, tf.float32) one_hot_labels = tf.one_hot(tf.cast(ece_labels, tf.int32), depth=num_classes) ece_probs = tf.concat([1. - probs, probs], axis=1) pred_labels = tf.math.argmax(ece_probs, axis=-1) auc_probs = tf.squeeze(probs, axis=1) texts_list.append(inputs['input_ids']) logits_list.append(logits) labels_list.append(labels) if 'identity' in dataset_name: for identity_label_name in utils.IDENTITY_LABELS: if identity_label_name not in additional_labels_dict: additional_labels_dict[identity_label_name] = [] additional_labels_dict[identity_label_name].append( additional_labels[identity_label_name].numpy()) sample_weight = generate_sample_weight( labels, class_weight['test/{}'.format(dataset_name)], FLAGS.ece_label_threshold) if dataset_name == 'ind': metrics['test/negative_log_likelihood'].update_state( negative_log_likelihood) metrics['test/auroc'].update_state(labels, auc_probs) metrics['test/aupr'].update_state(labels, auc_probs) metrics['test/brier'].update_state(labels, auc_probs) metrics['test/brier_weighted'].update_state( tf.expand_dims(labels, -1), probs, sample_weight=sample_weight) metrics['test/ece'].add_batch(ece_probs, label=ece_labels) metrics['test/acc'].update_state(ece_labels, pred_labels) metrics['test/acc_weighted'].update_state( ece_labels, pred_labels, sample_weight=sample_weight) metrics['test/precision'].update_state(ece_labels, pred_labels) metrics['test/recall'].update_state(ece_labels, pred_labels) metrics['test/f1'].update_state(one_hot_labels, ece_probs) for fraction in FLAGS.fractions: metrics['test_collab_acc/collab_acc_{}'.format( fraction)].update_state(ece_labels, ece_probs) else: metrics['test/nll_{}'.format(dataset_name)].update_state( negative_log_likelihood) metrics['test/auroc_{}'.format(dataset_name)].update_state( labels, auc_probs) metrics['test/aupr_{}'.format(dataset_name)].update_state( labels, auc_probs) metrics['test/brier_{}'.format(dataset_name)].update_state( labels, auc_probs) metrics['test/brier_weighted_{}'.format(dataset_name)].update_state( tf.expand_dims(labels, -1), probs, sample_weight=sample_weight) metrics['test/ece_{}'.format(dataset_name)].add_batch( ece_probs, label=ece_labels) metrics['test/acc_{}'.format(dataset_name)].update_state( ece_labels, pred_labels) metrics['test/acc_weighted_{}'.format(dataset_name)].update_state( ece_labels, pred_labels, sample_weight=sample_weight) metrics['test/precision_{}'.format(dataset_name)].update_state( ece_labels, pred_labels) metrics['test/recall_{}'.format(dataset_name)].update_state( ece_labels, pred_labels) metrics['test/f1_{}'.format(dataset_name)].update_state( one_hot_labels, ece_probs) for fraction in FLAGS.fractions: metrics['test_collab_acc/collab_acc_{}_{}'.format( fraction, dataset_name)].update_state(ece_labels, ece_probs) texts_all = tf.concat(texts_list, axis=0) logits_all = tf.concat(logits_list, axis=1) labels_all = tf.concat(labels_list, axis=0) additional_labels_all = [] if additional_labels_dict: additional_labels_all = list(additional_labels_dict.values()) utils.save_prediction( texts_all.numpy(), path=os.path.join(FLAGS.output_dir, 'texts_{}'.format(dataset_name))) utils.save_prediction( labels_all.numpy(), path=os.path.join(FLAGS.output_dir, 'labels_{}'.format(dataset_name))) utils.save_prediction( logits_all.numpy(), path=os.path.join(FLAGS.output_dir, 'logits_{}'.format(dataset_name))) if 'identity' in dataset_name: utils.save_prediction( np.array(additional_labels_all), path=os.path.join(FLAGS.output_dir, 'additional_labels_{}'.format(dataset_name))) message = ('{:.1%} completion for evaluation: dataset {:d}/{:d}'.format( (n + 1) / num_datasets, n + 1, num_datasets)) logging.info(message) total_results = {name: metric.result() for name, metric in metrics.items()} # Metrics from Robustness Metrics (like ECE) will return a dict with a # single key/value, instead of a scalar. total_results = { k: (list(v.values())[0] if isinstance(v, dict) else v) for k, v in total_results.items() } logging.info('Metrics: %s', total_results)
def main(argv): del argv # unused arg tf.io.gfile.makedirs(FLAGS.output_dir) logging.info('Model checkpoint will be saved at %s', FLAGS.output_dir) tf.random.set_seed(FLAGS.seed) if FLAGS.use_gpu: logging.info('Use GPU') strategy = tf.distribute.MirroredStrategy() else: logging.info('Use TPU at %s', FLAGS.tpu if FLAGS.tpu is not None else 'local') resolver = tf.distribute.cluster_resolver.TPUClusterResolver( tpu=FLAGS.tpu) tf.config.experimental_connect_to_cluster(resolver) tf.tpu.experimental.initialize_tpu_system(resolver) strategy = tf.distribute.TPUStrategy(resolver) batch_size = FLAGS.per_core_batch_size * FLAGS.num_cores test_batch_size = batch_size data_buffer_size = batch_size * 10 train_dataset_builder = ds.WikipediaToxicityDataset( split='train', data_dir=FLAGS.in_dataset_dir, shuffle_buffer_size=data_buffer_size) ind_dataset_builder = ds.WikipediaToxicityDataset( split='test', data_dir=FLAGS.in_dataset_dir, shuffle_buffer_size=data_buffer_size) ood_dataset_builder = ds.CivilCommentsDataset( split='test', data_dir=FLAGS.ood_dataset_dir, shuffle_buffer_size=data_buffer_size) ood_identity_dataset_builder = ds.CivilCommentsIdentitiesDataset( split='test', data_dir=FLAGS.identity_dataset_dir, shuffle_buffer_size=data_buffer_size) train_dataset_builders = { 'wikipedia_toxicity_subtypes': train_dataset_builder } test_dataset_builders = { 'ind': ind_dataset_builder, 'ood': ood_dataset_builder, 'ood_identity': ood_identity_dataset_builder, } class_weight = utils.create_class_weight(train_dataset_builders, test_dataset_builders) logging.info('class_weight: %s', str(class_weight)) ds_info = train_dataset_builder.tfds_info # Positive and negative classes. num_classes = ds_info.metadata['num_classes'] train_datasets = {} dataset_steps_per_epoch = {} total_steps_per_epoch = 0 for dataset_name, dataset_builder in train_dataset_builders.items(): train_datasets[dataset_name] = dataset_builder.load( batch_size=batch_size) dataset_steps_per_epoch[dataset_name] = ( dataset_builder.num_examples // batch_size) total_steps_per_epoch += dataset_steps_per_epoch[dataset_name] test_datasets = {} steps_per_eval = {} for dataset_name, dataset_builder in test_dataset_builders.items(): test_datasets[dataset_name] = dataset_builder.load( batch_size=test_batch_size) steps_per_eval[dataset_name] = (dataset_builder.num_examples // test_batch_size) if FLAGS.use_bfloat16: policy = tf.keras.mixed_precision.experimental.Policy('mixed_bfloat16') tf.keras.mixed_precision.experimental.set_policy(policy) summary_writer = tf.summary.create_file_writer( os.path.join(FLAGS.output_dir, 'summaries')) with strategy.scope(): logging.info('Building %s model', FLAGS.model_family) bert_config_dir, bert_ckpt_dir = utils.resolve_bert_ckpt_and_config_dir( FLAGS.bert_model_type, FLAGS.bert_dir, FLAGS.bert_config_dir, FLAGS.bert_ckpt_dir) bert_config = utils.create_config(bert_config_dir) model, bert_encoder = ub.models.DropoutBertBuilder( num_classes=num_classes, bert_config=bert_config, use_mc_dropout_mha=FLAGS.use_mc_dropout_mha, use_mc_dropout_att=FLAGS.use_mc_dropout_att, use_mc_dropout_ffn=FLAGS.use_mc_dropout_ffn, use_mc_dropout_output=FLAGS.use_mc_dropout_output, channel_wise_dropout_mha=FLAGS.channel_wise_dropout_mha, channel_wise_dropout_att=FLAGS.channel_wise_dropout_att, channel_wise_dropout_ffn=FLAGS.channel_wise_dropout_ffn) optimizer = utils.create_optimizer( FLAGS.base_learning_rate, steps_per_epoch=total_steps_per_epoch, epochs=FLAGS.train_epochs, warmup_proportion=FLAGS.warmup_proportion) logging.info('Model input shape: %s', model.input_shape) logging.info('Model output shape: %s', model.output_shape) logging.info('Model number of weights: %s', model.count_params()) metrics = { 'train/negative_log_likelihood': tf.keras.metrics.Mean(), 'train/accuracy': tf.keras.metrics.Accuracy(), 'train/accuracy_weighted': tf.keras.metrics.Accuracy(), 'train/auroc': tf.keras.metrics.AUC(), 'train/loss': tf.keras.metrics.Mean(), 'train/ece': um.ExpectedCalibrationError(num_bins=FLAGS.num_bins), 'train/precision': tf.keras.metrics.Precision(), 'train/recall': tf.keras.metrics.Recall(), 'train/f1': tfa_metrics.F1Score(num_classes=num_classes, average='micro', threshold=FLAGS.ece_label_threshold), } checkpoint = tf.train.Checkpoint(model=model, optimizer=optimizer) if FLAGS.prediction_mode: latest_checkpoint = tf.train.latest_checkpoint( FLAGS.eval_checkpoint_dir) else: latest_checkpoint = tf.train.latest_checkpoint(FLAGS.output_dir) initial_epoch = 0 if latest_checkpoint: # checkpoint.restore must be within a strategy.scope() so that optimizer # slot variables are mirrored. checkpoint.restore(latest_checkpoint) logging.info('Loaded checkpoint %s', latest_checkpoint) initial_epoch = optimizer.iterations.numpy( ) // total_steps_per_epoch elif FLAGS.model_family.lower() == 'bert': # load BERT from initial checkpoint bert_checkpoint = tf.train.Checkpoint(model=bert_encoder) bert_checkpoint.restore( bert_ckpt_dir).assert_existing_objects_matched() logging.info('Loaded BERT checkpoint %s', bert_ckpt_dir) metrics.update({ 'test/negative_log_likelihood': tf.keras.metrics.Mean(), 'test/auroc': tf.keras.metrics.AUC(curve='ROC'), 'test/aupr': tf.keras.metrics.AUC(curve='PR'), 'test/brier': tf.keras.metrics.MeanSquaredError(), 'test/brier_weighted': tf.keras.metrics.MeanSquaredError(), 'test/ece': um.ExpectedCalibrationError(num_bins=FLAGS.num_bins), 'test/acc': tf.keras.metrics.Accuracy(), 'test/acc_weighted': tf.keras.metrics.Accuracy(), 'test/eval_time': tf.keras.metrics.Mean(), 'test/precision': tf.keras.metrics.Precision(), 'test/recall': tf.keras.metrics.Recall(), 'test/f1': tfa_metrics.F1Score(num_classes=num_classes, average='micro', threshold=FLAGS.ece_label_threshold), }) for fraction in FLAGS.fractions: metrics.update({ 'test_collab_acc/collab_acc_{}'.format(fraction): um.OracleCollaborativeAccuracy(fraction=float(fraction), num_bins=FLAGS.num_bins) }) for dataset_name, test_dataset in test_datasets.items(): if dataset_name != 'ind': metrics.update({ 'test/nll_{}'.format(dataset_name): tf.keras.metrics.Mean(), 'test/auroc_{}'.format(dataset_name): tf.keras.metrics.AUC(curve='ROC'), 'test/aupr_{}'.format(dataset_name): tf.keras.metrics.AUC(curve='PR'), 'test/brier_{}'.format(dataset_name): tf.keras.metrics.MeanSquaredError(), 'test/brier_weighted_{}'.format(dataset_name): tf.keras.metrics.MeanSquaredError(), 'test/ece_{}'.format(dataset_name): um.ExpectedCalibrationError(num_bins=FLAGS.num_bins), 'test/acc_{}'.format(dataset_name): tf.keras.metrics.Accuracy(), 'test/acc_weighted_{}'.format(dataset_name): tf.keras.metrics.Accuracy(), 'test/eval_time_{}'.format(dataset_name): tf.keras.metrics.Mean(), 'test/precision_{}'.format(dataset_name): tf.keras.metrics.Precision(), 'test/recall_{}'.format(dataset_name): tf.keras.metrics.Recall(), 'test/f1_{}'.format(dataset_name): tfa_metrics.F1Score(num_classes=num_classes, average='micro', threshold=FLAGS.ece_label_threshold), }) for fraction in FLAGS.fractions: metrics.update({ 'test_collab_acc/collab_acc_{}_{}'.format( fraction, dataset_name): um.OracleCollaborativeAccuracy( fraction=float(fraction), num_bins=FLAGS.num_bins) }) @tf.function def generate_sample_weight(labels, class_weight, label_threshold=0.7): """Generate sample weight for weighted accuracy calculation.""" if label_threshold != 0.7: logging.warning( 'The class weight was based on `label_threshold` = 0.7, ' 'and weighted accuracy/brier will be meaningless if ' '`label_threshold` is not equal to this value, which is ' 'recommended by Jigsaw Conversation AI team.') labels_int = tf.cast(labels > label_threshold, tf.int32) sample_weight = tf.gather(class_weight, labels_int) return sample_weight @tf.function def train_step(iterator, dataset_name): """Training StepFn.""" def step_fn(inputs): """Per-Replica StepFn.""" features, labels, _ = utils.create_feature_and_label(inputs) with tf.GradientTape() as tape: logits = model(features, training=True) if FLAGS.use_bfloat16: logits = tf.cast(logits, tf.float32) loss_logits = tf.squeeze(logits, axis=1) if FLAGS.loss_type == 'cross_entropy': logging.info('Using cross entropy loss') negative_log_likelihood = tf.nn.sigmoid_cross_entropy_with_logits( labels, loss_logits) elif FLAGS.loss_type == 'focal_cross_entropy': logging.info('Using focal cross entropy loss') negative_log_likelihood = tfa_losses.sigmoid_focal_crossentropy( labels, loss_logits, alpha=FLAGS.focal_loss_alpha, gamma=FLAGS.focal_loss_gamma, from_logits=True) elif FLAGS.loss_type == 'mse': logging.info('Using mean squared error loss') loss_probs = tf.nn.sigmoid(loss_logits) negative_log_likelihood = tf.keras.losses.mean_squared_error( labels, loss_probs) elif FLAGS.loss_type == 'mae': logging.info('Using mean absolute error loss') loss_probs = tf.nn.sigmoid(loss_logits) negative_log_likelihood = tf.keras.losses.mean_absolute_error( labels, loss_probs) negative_log_likelihood = tf.reduce_mean( negative_log_likelihood) l2_loss = sum(model.losses) loss = negative_log_likelihood + l2_loss # Scale the loss given the TPUStrategy will reduce sum all gradients. scaled_loss = loss / strategy.num_replicas_in_sync grads = tape.gradient(scaled_loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) probs = tf.nn.sigmoid(logits) # Cast labels to discrete for ECE computation. ece_labels = tf.cast(labels > FLAGS.ece_label_threshold, tf.float32) one_hot_labels = tf.one_hot(tf.cast(ece_labels, tf.int32), depth=num_classes) ece_probs = tf.concat([1. - probs, probs], axis=1) auc_probs = tf.squeeze(probs, axis=1) pred_labels = tf.math.argmax(ece_probs, axis=-1) sample_weight = generate_sample_weight( labels, class_weight['train/{}'.format(dataset_name)], FLAGS.ece_label_threshold) metrics['train/negative_log_likelihood'].update_state( negative_log_likelihood) metrics['train/accuracy'].update_state(labels, pred_labels) metrics['train/accuracy_weighted'].update_state( ece_labels, pred_labels, sample_weight=sample_weight) metrics['train/auroc'].update_state(labels, auc_probs) metrics['train/loss'].update_state(loss) metrics['train/ece'].update_state(ece_labels, ece_probs) metrics['train/precision'].update_state(ece_labels, pred_labels) metrics['train/recall'].update_state(ece_labels, pred_labels) metrics['train/f1'].update_state(one_hot_labels, ece_probs) strategy.run(step_fn, args=(next(iterator), )) @tf.function def test_step(iterator, dataset_name): """Evaluation StepFn to log metrics.""" def step_fn(inputs): """Per-Replica StepFn.""" features, labels, _ = utils.create_feature_and_label(inputs) eval_start_time = time.time() logits = model(features, training=False) eval_time = (time.time() - eval_start_time) / FLAGS.per_core_batch_size if FLAGS.use_bfloat16: logits = tf.cast(logits, tf.float32) probs = tf.nn.sigmoid(logits) # Cast labels to discrete for ECE computation. ece_labels = tf.cast(labels > FLAGS.ece_label_threshold, tf.float32) one_hot_labels = tf.one_hot(tf.cast(ece_labels, tf.int32), depth=num_classes) ece_probs = tf.concat([1. - probs, probs], axis=1) pred_labels = tf.math.argmax(ece_probs, axis=-1) auc_probs = tf.squeeze(probs, axis=1) loss_logits = tf.squeeze(logits, axis=1) negative_log_likelihood = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels, loss_logits)) sample_weight = generate_sample_weight( labels, class_weight['test/{}'.format(dataset_name)], FLAGS.ece_label_threshold) if dataset_name == 'ind': metrics['test/negative_log_likelihood'].update_state( negative_log_likelihood) metrics['test/auroc'].update_state(labels, auc_probs) metrics['test/aupr'].update_state(labels, auc_probs) metrics['test/brier'].update_state(labels, auc_probs) metrics['test/brier_weighted'].update_state( tf.expand_dims(labels, -1), probs, sample_weight=sample_weight) metrics['test/ece'].update_state(ece_labels, ece_probs) metrics['test/acc'].update_state(ece_labels, pred_labels) metrics['test/acc_weighted'].update_state( ece_labels, pred_labels, sample_weight=sample_weight) metrics['test/eval_time'].update_state(eval_time) metrics['test/precision'].update_state(ece_labels, pred_labels) metrics['test/recall'].update_state(ece_labels, pred_labels) metrics['test/f1'].update_state(one_hot_labels, ece_probs) for fraction in FLAGS.fractions: metrics['test_collab_acc/collab_acc_{}'.format( fraction)].update_state(ece_labels, ece_probs) else: metrics['test/nll_{}'.format(dataset_name)].update_state( negative_log_likelihood) metrics['test/auroc_{}'.format(dataset_name)].update_state( labels, auc_probs) metrics['test/aupr_{}'.format(dataset_name)].update_state( labels, auc_probs) metrics['test/brier_{}'.format(dataset_name)].update_state( labels, auc_probs) metrics['test/brier_weighted_{}'.format( dataset_name)].update_state(tf.expand_dims(labels, -1), probs, sample_weight=sample_weight) metrics['test/ece_{}'.format(dataset_name)].update_state( ece_labels, ece_probs) metrics['test/acc_{}'.format(dataset_name)].update_state( ece_labels, pred_labels) metrics['test/acc_weighted_{}'.format( dataset_name)].update_state(ece_labels, pred_labels, sample_weight=sample_weight) metrics['test/eval_time_{}'.format(dataset_name)].update_state( eval_time) metrics['test/precision_{}'.format(dataset_name)].update_state( ece_labels, pred_labels) metrics['test/recall_{}'.format(dataset_name)].update_state( ece_labels, pred_labels) metrics['test/f1_{}'.format(dataset_name)].update_state( one_hot_labels, ece_probs) for fraction in FLAGS.fractions: metrics['test_collab_acc/collab_acc_{}_{}'.format( fraction, dataset_name)].update_state(ece_labels, ece_probs) strategy.run(step_fn, args=(next(iterator), )) @tf.function def final_eval_step(iterator): """Final Evaluation StepFn to save prediction to directory.""" def step_fn(inputs): bert_features, labels, additional_labels = utils.create_feature_and_label( inputs) logits = model(bert_features, training=False) features = inputs['input_ids'] return features, logits, labels, additional_labels (per_replica_texts, per_replica_logits, per_replica_labels, per_replica_additional_labels) = (strategy.run( step_fn, args=(next(iterator), ))) if strategy.num_replicas_in_sync > 1: texts_list = tf.concat(per_replica_texts.values, axis=0) logits_list = tf.concat(per_replica_logits.values, axis=0) labels_list = tf.concat(per_replica_labels.values, axis=0) additional_labels_dict = {} for additional_label in utils.IDENTITY_LABELS: if additional_label in per_replica_additional_labels: additional_labels_dict[additional_label] = tf.concat( per_replica_additional_labels[additional_label], axis=0) else: texts_list = per_replica_texts logits_list = per_replica_logits labels_list = per_replica_labels additional_labels_dict = {} for additional_label in utils.IDENTITY_LABELS: if additional_label in per_replica_additional_labels: additional_labels_dict[ additional_label] = per_replica_additional_labels[ additional_label] return texts_list, logits_list, labels_list, additional_labels_dict if FLAGS.prediction_mode: # Prediction and exit. for dataset_name, test_dataset in test_datasets.items(): test_iterator = iter(test_dataset) # pytype: disable=wrong-arg-types message = 'Final eval on dataset {}'.format(dataset_name) logging.info(message) texts_all = [] logits_all = [] labels_all = [] additional_labels_all_dict = {} if 'identity' in dataset_name: for identity_label_name in utils.IDENTITY_LABELS: additional_labels_all_dict[identity_label_name] = [] try: with tf.experimental.async_scope(): for step in range(steps_per_eval[dataset_name]): if step % 20 == 0: message = 'Starting to run eval step {}/{} of dataset: {}'.format( step, steps_per_eval[dataset_name], dataset_name) logging.info(message) (text_step, logits_step, labels_step, additional_labels_dict_step ) = final_eval_step(test_iterator) texts_all.append(text_step) logits_all.append(logits_step) labels_all.append(labels_step) if 'identity' in dataset_name: for identity_label_name in utils.IDENTITY_LABELS: additional_labels_all_dict[ identity_label_name].append( additional_labels_dict_step[ identity_label_name]) except (StopIteration, tf.errors.OutOfRangeError): tf.experimental.async_clear_error() logging.info('Done with eval on %s', dataset_name) texts_all = tf.concat(texts_all, axis=0) logits_all = tf.concat(logits_all, axis=0) labels_all = tf.concat(labels_all, axis=0) additional_labels_all = [] if additional_labels_all_dict: for identity_label_name in utils.IDENTITY_LABELS: additional_labels_all.append( tf.concat( additional_labels_all_dict[identity_label_name], axis=0)) additional_labels_all = tf.convert_to_tensor(additional_labels_all) utils.save_prediction(texts_all.numpy(), path=os.path.join( FLAGS.output_dir, 'texts_{}'.format(dataset_name))) utils.save_prediction(labels_all.numpy(), path=os.path.join( FLAGS.output_dir, 'labels_{}'.format(dataset_name))) utils.save_prediction(logits_all.numpy(), path=os.path.join( FLAGS.output_dir, 'logits_{}'.format(dataset_name))) if 'identity' in dataset_name: utils.save_prediction( additional_labels_all.numpy(), path=os.path.join( FLAGS.output_dir, 'additional_labels_{}'.format(dataset_name))) logging.info('Done with testing on %s', dataset_name) else: # Execute train / eval loop. start_time = time.time() train_iterators = {} for dataset_name, train_dataset in train_datasets.items(): train_iterators[dataset_name] = iter(train_dataset) for epoch in range(initial_epoch, FLAGS.train_epochs): logging.info('Starting to run epoch: %s', epoch) current_step = epoch * total_steps_per_epoch for dataset_name, train_iterator in train_iterators.items(): for step in range(dataset_steps_per_epoch[dataset_name]): train_step(train_iterator, dataset_name) current_step += 1 max_steps = total_steps_per_epoch * FLAGS.train_epochs time_elapsed = time.time() - start_time steps_per_sec = float(current_step) / time_elapsed eta_seconds = (max_steps - current_step) / steps_per_sec message = ( '{:.1%} completion: epoch {:d}/{:d}. {:.1f} steps/s. ' 'ETA: {:.0f} min. Time elapsed: {:.0f} min'.format( current_step / max_steps, epoch + 1, FLAGS.train_epochs, steps_per_sec, eta_seconds / 60, time_elapsed / 60)) if step % 20 == 0: logging.info(message) if epoch % FLAGS.evaluation_interval == 0: for dataset_name, test_dataset in test_datasets.items(): test_iterator = iter(test_dataset) # pytype: disable=wrong-arg-types logging.info('Testing on dataset %s', dataset_name) try: with tf.experimental.async_scope(): for step in range(steps_per_eval[dataset_name]): if step % 20 == 0: logging.info( 'Starting to run eval step %s/%s of epoch: %s', step, steps_per_eval[dataset_name], epoch) test_step(test_iterator, dataset_name) except (StopIteration, tf.errors.OutOfRangeError): tf.experimental.async_clear_error() logging.info('Done with testing on %s', dataset_name) logging.info('Train Loss: %.4f, AUROC: %.4f', metrics['train/loss'].result(), metrics['train/auroc'].result()) logging.info('Test NLL: %.4f, AUROC: %.4f', metrics['test/negative_log_likelihood'].result(), metrics['test/auroc'].result()) # record results total_results = {} for name, metric in metrics.items(): total_results[name] = metric.result() with summary_writer.as_default(): for name, result in total_results.items(): tf.summary.scalar(name, result, step=epoch + 1) for name, metric in metrics.items(): metric.reset_states() checkpoint_interval = min(FLAGS.checkpoint_interval, FLAGS.train_epochs) if checkpoint_interval > 0 and (epoch + 1) % checkpoint_interval == 0: checkpoint_name = checkpoint.save( os.path.join(FLAGS.output_dir, 'checkpoint')) logging.info('Saved checkpoint to %s', checkpoint_name) # Save model in SavedModel format on exit. final_save_name = os.path.join(FLAGS.output_dir, 'model') model.save(final_save_name) logging.info('Saved model to %s', final_save_name)
def execute_run(DATA_SET, corruption): global LOGGER encoder_structure_config, decoder_structure_config, loss_structure_config, latent_dim = create_config( DATA_SET) anomaly_ratio = -1 ae_model = None config_file = 'architecture_config.yaml' with open(config_file, 'r') as fh: config = yaml.safe_load(fh) config = config[DATA_SET] burn_in_epochs = config['burn_in_epochs'] phase_2_epochs = config['phase_2_epochs'] phase_3_epochs = config['phase_3_epochs'] batch_size = config['batch_size'] ae_dropout = config['ae_dropout'] fc_dropout = config['fc_dropout'] anomaly_ratio = config['anomaly_ratio'] LR = config['LR'] max_gamma = config['max_gamma'] ae_model = Model( DEVICE, latent_dim, encoder_structure_config, decoder_structure_config, loss_structure_config, batch_size=batch_size, fc_dropout=fc_dropout, ae_dropout=ae_dropout, learning_rate=LR, max_gamma=max_gamma, burn_in_epochs=burn_in_epochs, phase_2_epochs=phase_2_epochs, phase_3_epochs=phase_3_epochs, ) print('Model', ae_model.network_module) num_anomaly_sets = 5 pos, neg, data_dict, meta_data_df = model_data_fetcher.get_data( DATA_SET, num_anom_sets=num_anomaly_sets, anomaly_ratio=anomaly_ratio, corruption_perc=corruption) ae_model.train_model(pos, neg) test_norm_X = data_dict['test'] auc_list = [] ae_model.mode = 'test' def _normalize_(val, _min, _max): return (val - _min) / (_max - _min) for idx in range(1, num_anomaly_sets + 1): key = 'anom_' + str(idx) test_anom_df = data_dict[key] test_anom_X = test_anom_df.values x1 = test_norm_X x2 = test_anom_X x1_scores = ae_model.get_score(x1) x2_scores = ae_model.get_score(x2) res_data = [] labels = [1 for _ in range(x1.shape[0]) ] + [0 for _ in range(x2.shape[0])] _scores = np.concatenate([x1_scores, x2_scores], axis=0) for i, j in zip(_scores, labels): res_data.append((i[0], j)) res_df = pd.DataFrame(res_data, columns=['score', 'label']) res_df = res_df.sort_values(by=['score'], ascending=True) _max = max(res_df['score']) _min = min(res_df['score']) res_df['score'] = res_df['score'].parallel_apply(_normalize_, args=( _min, _max, )) _max = max(res_df['score']) _min = min(res_df['score']) step = (_max - _min) / 100 # Vary the threshold thresh = _min + step thresh = round(thresh, 3) num_anomalies = x2.shape[0] print('Num anomalies', num_anomalies) P = [] R = [0] while thresh <= _max + step: sel = res_df.loc[res_df['score'] <= thresh] if len(sel) == 0: thresh += step continue correct = sel.loc[sel['label'] == 0] prec = len(correct) / len(sel) rec = len(correct) / num_anomalies P.append(prec) R.append(rec) thresh += step thresh = round(thresh, 3) P = [P[0]] + P pr_auc = auc(R, P) try: plt.figure(figsize=[8, 6]) plt.plot(R, P) plt.title('Precision Recall Curve || auPR :' + "{:0.4f}".format(pr_auc), fontsize=15) plt.xlabel('Recall', fontsize=15) plt.ylabel('Precision', fontsize=15) plt.show() except: pass print("AUC : {:0.4f} ".format(pr_auc)) auc_list.append(pr_auc) _mean = np.mean(auc_list) _std = np.std(auc_list) print(' Mean AUC {:0.4f} '.format(_mean)) print(' AUC std {:0.4f} '.format(_std)) return _mean, _std
def init_app(app): from ansi_management import (warning, success, error) from utils import (create_config, runningInDocker) from config import Config from connections import tor_request warnings.filterwarnings('ignore') # Load config.ini into app # -------------------------------------------- # Read Global Variables from warden.config(s) # Can be accessed like a dictionary like: # app.settings['PORTFOLIO']['RENEW_NAV'] # -------------------------------------------- config_file = Config.config_file app.warden_status = {} # Check for internet connection internet_ok = internet_connected() if internet_ok is True: print(success("✅ Internet Connection")) else: print( error( "[!] WARden needs internet connection. Check your connection.") ) print(warning("[!] Exiting")) exit() # Config config_settings = configparser.ConfigParser() if os.path.isfile(config_file): config_settings.read(config_file) app.warden_status['initial_setup'] = False print( success( "✅ Config Loaded from config.ini - edit it for customization" )) else: print( error( " Config File could not be loaded, created a new one with default values..." )) create_config(config_file) config_settings.read(config_file) app.warden_status['initial_setup'] = True table_error = False try: # create empty instance of LoginManager app.login_manager = LoginManager() except sqlite3.OperationalError: table_error = True # Create empty instance of SQLAlchemy app.db = SQLAlchemy() app.db.init_app(app) # Import models so tables are created from models import Trades, User, AccountInfo, TickerInfo, SpecterInfo app.db.create_all() # There was an initial error on getting users # probably because tables were not created yet. # The above create_all should have solved it so try again. if table_error: # create empty instance of LoginManager app.login_manager = LoginManager() # If login required - go to login: app.login_manager.login_view = "warden.login" # To display messages - info class (Bootstrap) app.login_manager.login_message_category = "secondary" app.login_manager.init_app(app) # Create empty instance of messagehandler from message_handler import MessageHandler app.message_handler = MessageHandler() app.message_handler.clean_all() # Get Version print("") try: version_file = Config.version_file with open(version_file, 'r') as file: current_version = file.read().replace('\n', '') except Exception: current_version = 'unknown' with app.app_context(): app.version = current_version # Check if there are any users on database, if not, needs initial setup users = User.query.all() if users == []: app.warden_status['initial_setup'] = True # Check for Cryptocompare API Keys print("") check_cryptocompare() print("") print(f"[i] Running WARden version: {current_version}") app.warden_status['running_version'] = current_version # CHECK FOR UPGRADE repo_url = 'https://api.github.com/repos/pxsocs/warden/releases' try: github_version = tor_request(repo_url).json()[0]['tag_name'] except Exception: github_version = None app.warden_status['github_version'] = github_version if github_version: print(f"[i] Newest WARden version available: {github_version}") parsed_github = version.parse(github_version) parsed_version = version.parse(current_version) app.warden_status['needs_upgrade'] = False if parsed_github > parsed_version: print(warning(" [i] Upgrade Available")) app.warden_status['needs_upgrade'] = True if parsed_github == parsed_version: print(success("✅ You are running the latest version")) else: print(warning("[!] Could not check GitHub for updates")) print("") # Check if config.ini exists with app.app_context(): app.settings = config_settings with app.app_context(): try: from utils import fxsymbol app.fx = fxsymbol(config_settings['PORTFOLIO']['base_fx'], 'all') except KeyError: # Problem with this config, reset print(error(" [!] Config File needs to be rebuilt")) print("") create_config(config_file) # TOR Server through Onion Address -- # USE WITH CAUTION - ONION ADDRESSES CAN BE EXPOSED! # WARden needs to implement authentication (coming soon) if app.settings['SERVER'].getboolean('onion_server'): from stem.control import Controller from urllib.parse import urlparse app.tor_port = app.settings['SERVER'].getint('onion_port') app.port = app.settings['SERVER'].getint('port') from warden_modules import home_path toraddr_file = os.path.join(home_path(), "onion.txt") app.save_tor_address_to = toraddr_file proxy_url = "socks5h://localhost:9050" tor_control_port = "" try: tor_control_address = urlparse(proxy_url).netloc.split(":")[0] if tor_control_address == "localhost": tor_control_address = "127.0.0.1" app.controller = Controller.from_port( address=tor_control_address, port=int(tor_control_port) if tor_control_port else "default", ) except Exception: app.controller = None from tor import start_hidden_service start_hidden_service(app) from routes import warden from errors.handlers import errors from api.routes import api from csv_routes.routes import csv_routes from user_routes.routes import user_routes from simulator.routes import simulator app.register_blueprint(warden) app.register_blueprint(errors) app.register_blueprint(api) app.register_blueprint(csv_routes) app.register_blueprint(user_routes) app.register_blueprint(simulator) # Prepare app to receive Specter Server info # For the first load, just get a saved file if available # The background jobs will update later with app.app_context(): from specter_importer import Specter app.specter = Specter() app.specter.refresh_txs(load=True) app.downloading = False with app.app_context(): app.runningInDocker = runningInDocker() with app.app_context(): app.tor = create_tor() # Check if home folder exists, if not create home = str(Path.home()) home_path = os.path.join(home, 'warden/') try: os.makedirs(os.path.dirname(home_path)) except Exception: pass # Start Schedulers from backgroundjobs import (background_settings_update, background_specter_update, background_scan_network, background_specter_health, background_mempool_seeker) def bk_su(): with app.app_context(): background_specter_update() def bk_stu(): with app.app_context(): background_settings_update() def bk_scan(): with app.app_context(): background_scan_network() def bk_specter_health(): with app.app_context(): background_specter_health() def bk_mempool_health(): with app.app_context(): background_mempool_seeker() app.scheduler = BackgroundScheduler() app.scheduler.add_job(bk_su, 'interval', seconds=1) app.scheduler.add_job(bk_stu, 'interval', seconds=1) app.scheduler.add_job(bk_scan, 'interval', seconds=1) app.scheduler.add_job(bk_specter_health, 'interval', seconds=1) app.scheduler.add_job(bk_mempool_health, 'interval', seconds=1) app.scheduler.start() print(success("✅ Background jobs running")) print("") app.app_context().push() print(success("✅ Application startup is complete")) return app
def should_skip(): config = utils.create_config() return utils.assert_server_version( api_host=config.api_host, major=1, minor=4 )
def data_processing(DATA_PATH, ratio_list, debug, label_correct=True): """configuration""" if label_correct: config_path = './label_correct_config.yml' # config loadpath else: config_path = './label_no_correct_config.yml' # config loadpath create_config(config_path) with open(config_path, 'r') as f_obj: config = yaml.load(f_obj, Loader=yaml.FullLoader) split = config['SPLIT'] split_num = config['SPLIT_NUM'] # final split image number is split_num^2 if split: DATA_SAVE_PATH = os.path.join( DATA_PATH, 'datasets_split') # flist savepath else: DATA_SAVE_PATH = os.path.join(DATA_PATH + 'datasets') IMG_SPLIT_SAVE_PATH = os.path.join( DATA_PATH, 'png_split') # img split savepath EDGE_SPLIT_SAVE_PATH = os.path.join( DATA_PATH, 'edge_split') # edge split savepath # save path create_dir(DATA_SAVE_PATH) if split: create_dir(IMG_SPLIT_SAVE_PATH) create_dir(EDGE_SPLIT_SAVE_PATH) # generate edge from points # time_start=time.time() # print(time_start) # if label_correct: # gen_edge_from_point_base_gradient(DATA_PATH, debug) # else: # gen_edge_from_point(DATA_PATH, debug) # time_end=time.time() # print(time_end) # print('generate edge from points time cost',time_end-time_start,'s') if debug==0: subject_word = config['SUBJECT_WORD'] # generate a list of original edge edge_flist_src = os.path.join(DATA_SAVE_PATH, subject_word + '_edge.flist') gen_flist(os.path.join(DATA_PATH, 'edge'), edge_flist_src) edge_num = len(np.genfromtxt( edge_flist_src, dtype=np.str, encoding='utf-8')) # generate a list of original images png_flist_src = os.path.join(DATA_SAVE_PATH, subject_word + '_png.flist') gen_flist(os.path.join(DATA_PATH, 'png'), png_flist_src) # img (training set, verification set, test set)(not split) key_name = 'png' png_flist = os.path.join(DATA_SAVE_PATH, subject_word + '_' + key_name) png_val_test_PATH = [png_flist+'_train.flist', png_flist+'_val.flist', png_flist+'_test.flist'] id_list = gen_flist_train_val_test( png_flist_src, edge_num, png_val_test_PATH, ratio_list, config['SEED'], []) # edge (training set, verification set, test set)(not split) key_name = 'edge' edge_flist = os.path.join(DATA_SAVE_PATH, subject_word + '_' + key_name) edge_val_test_PATH = [edge_flist+'_train.flist', edge_flist+'_val.flist', edge_flist+'_test.flist'] gen_flist_train_val_test( edge_flist_src, edge_num, edge_val_test_PATH, ratio_list, config['SEED'], id_list) # split data if split: key_name = 'png_split' png_flist = os.path.join(DATA_SAVE_PATH, subject_word + '_' + key_name) png_val_test_PATH_save = [png_flist+'_train.flist', png_flist+'_val.flist', png_flist+'_test.flist'] i = 0 id_img = 0 for path in png_val_test_PATH: if ratio_list[i] != 0: id_img = data_split(split_num, path, IMG_SPLIT_SAVE_PATH, 'png', id_img, png_val_test_PATH_save[i], RGB=True) i += 1 key_name = 'edge_split' png_flist = os.path.join(DATA_SAVE_PATH, subject_word + '_' + key_name) edge_val_test_PATH_save = [ png_flist+'_train.flist', png_flist+'_val.flist', png_flist+'_test.flist'] i = 0 id_img = 0 for path in edge_val_test_PATH: if ratio_list[i] != 0: id_img = data_split(split_num, path, EDGE_SPLIT_SAVE_PATH, 'edge', id_img, edge_val_test_PATH_save[i], RGB=False) i += 1 png_val_test_PATH = png_val_test_PATH_save edge_val_test_PATH = edge_val_test_PATH_save """setting path of data list""" set_flist_config(config_path, png_val_test_PATH, flag='data') set_flist_config(config_path, edge_val_test_PATH, flag='edge')