def __init__(self, train_dir: str, dataset: data.DataSets, **kwargs): self.train_dir = os.path.join(train_dir, FLAGS.rerun, self.experiment_name(**kwargs)) self.params = EasyDict(kwargs) self.dataset = dataset self.session = None self.tmp = EasyDict(print_queue=[], cache=EasyDict()) self.step = tf.train.get_or_create_global_step() self.ops = self.model(**kwargs) self.ops.update_step = tf.assign_add(self.step, FLAGS.batch) self.add_summaries(**kwargs) print(' Config '.center(80, '-')) print('train_dir', self.train_dir) print('%-32s %s' % ('Model', self.__class__.__name__)) print('%-32s %s' % ('Dataset', dataset.name)) for k, v in sorted(kwargs.items()): print('%-32s %s' % (k, v)) print(' Model '.center(80, '-')) to_print = [ tuple(['%s' % x for x in (v.name, np.prod(v.shape), v.shape)]) for v in utils.model_vars(None) ] to_print.append(('Total', str(sum(int(x[1]) for x in to_print)), '')) sizes = [max([len(x[i]) for x in to_print]) for i in range(3)] fmt = '%%-%ds %%%ds %%%ds' % tuple(sizes) for x in to_print[:-1]: print(fmt % x) print() print(fmt % to_print[-1]) print('-' * 80) self._create_initial_files()
def guess_label(self, logits_y, p_data, p_model, T, use_dm, redux, **kwargs): del kwargs if redux == 'swap': p_model_y = tf.concat( [tf.nn.softmax(x) for x in logits_y[1:] + logits_y[:1]], axis=0) elif redux == 'mean': p_model_y = sum(tf.nn.softmax(x) for x in logits_y) / len(logits_y) p_model_y = tf.tile(p_model_y, [len(logits_y), 1]) elif redux == '1st': p_model_y = tf.nn.softmax(logits_y[0]) p_model_y = tf.tile(p_model_y, [len(logits_y), 1]) else: raise NotImplementedError() # Compute the target distribution. # 1. Rectify the distribution or not. if use_dm: p_ratio = (1e-6 + p_data) / (1e-6 + p_model) p_weighted = p_model_y * p_ratio p_weighted /= tf.reduce_sum(p_weighted, axis=1, keep_dims=True) else: p_weighted = p_model_y # 2. Apply sharpening. p_target = tf.pow(p_weighted, 1. / T) p_target /= tf.reduce_sum(p_target, axis=1, keep_dims=True) return EasyDict(p_target=p_target, p_model=p_model_y)
def model(self, batch, lr, wd, ema, warmup_pos, consistency_weight, **kwargs): hwc = [self.dataset.height, self.dataset.width, self.dataset.colors] xt_in = tf.placeholder(tf.float32, [batch] + hwc, 'xt') # For training x_in = tf.placeholder(tf.float32, [None] + hwc, 'x') y_in = tf.placeholder(tf.float32, [batch, 2] + hwc, 'y') l_in = tf.placeholder(tf.int32, [batch], 'labels') l = tf.one_hot(l_in, self.nclass) wd *= lr warmup = tf.clip_by_value( tf.to_float(self.step) / (warmup_pos * (FLAGS.train_kimg << 10)), 0, 1) classifier = lambda x, **kw: self.classifier(x, **kw, **kwargs).logits logits_x = classifier(xt_in, training=True) post_ops = tf.get_collection( tf.GraphKeys.UPDATE_OPS ) # Take only first call to update batch norm. y = tf.reshape(tf.transpose(y_in, [1, 0, 2, 3, 4]), [-1] + hwc) y_1, y_2 = tf.split(y, 2) ema = tf.train.ExponentialMovingAverage(decay=ema) ema_op = ema.apply(utils.model_vars()) ema_getter = functools.partial(utils.getter_ema, ema) logits_y = classifier(y_1, training=True, getter=ema_getter) logits_teacher = tf.stop_gradient(logits_y) logits_student = classifier(y_2, training=True) loss_mt = tf.reduce_mean( (tf.nn.softmax(logits_teacher) - tf.nn.softmax(logits_student))**2, -1) loss_mt = tf.reduce_mean(loss_mt) loss = tf.nn.softmax_cross_entropy_with_logits_v2(labels=l, logits=logits_x) loss = tf.reduce_mean(loss) tf.summary.scalar('losses/xe', loss) tf.summary.scalar('losses/mt', loss_mt) post_ops.append(ema_op) post_ops.extend([ tf.assign(v, v * (1 - wd)) for v in utils.model_vars('classify') if 'kernel' in v.name ]) train_op = tf.train.AdamOptimizer(lr).minimize( loss + loss_mt * warmup * consistency_weight, colocate_gradients_with_ops=True) with tf.control_dependencies([train_op]): train_op = tf.group(*post_ops) return EasyDict( xt=xt_in, x=x_in, y=y_in, label=l_in, train_op=train_op, classify_raw=tf.nn.softmax(classifier( x_in, training=False)), # No EMA, for debugging. classify_op=tf.nn.softmax( classifier(x_in, getter=ema_getter, training=False)))
def model(self, batch, lr, wd, ema, warmup_pos, consistency_weight, tcr_augment, **kwargs): hwc = [self.dataset.height, self.dataset.width, self.dataset.colors] xt_in = tf.placeholder(tf.float32, [batch] + hwc, 'xt') # For training x_in = tf.placeholder(tf.float32, [None] + hwc, 'x') y_in = tf.placeholder(tf.float32, [batch, 2] + hwc, 'y') # The unlabeled data l_in = tf.placeholder(tf.int32, [batch], 'labels') l = tf.one_hot(l_in, self.nclass) warmup = tf.clip_by_value(tf.to_float(self.step) / (warmup_pos * (FLAGS.train_kimg << 10)), 0, 1) lrate = tf.clip_by_value(tf.to_float(self.step) / (FLAGS.train_kimg << 10), 0, 1) lr *= tf.cos(lrate * (7 * np.pi) / (2 * 8)) tf.summary.scalar('monitors/lr', lr) # Labeled data. classifier = lambda x, **kw: self.classifier(x, **kw, **kwargs).logits logits_x = classifier(xt_in, training=True) post_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # Take only first call to update batch norm. # Unlabeled data. classifier_embedding = lambda x, **kw: self.classifier(x, **kw, **kwargs).embeds y = tf.reshape(tf.transpose(y_in, [1, 0, 2, 3, 4]), [-1] + hwc) y_delta = self.augment(y, tcr_augment=tcr_augment) # Apply tcr_augment y_1, y_2 = tf.split(y, 2) y_1_delta, y_2_delta = tf.split(y_delta, 2) embeds_y_1 = classifier_embedding(y_1, training=True) embeds_y_1_delta = classifier_embedding(y_1_delta, training=True) embeds_y_2 = classifier_embedding(y_2, training=True) embeds_y_2_delta = classifier_embedding(y_2_delta, training=True) loss_tcr = tf.losses.mean_squared_error((y_1_delta - y_1), (y_2_delta - y_2)) loss_tcr = tf.reduce_mean(loss_tcr) tf.summary.scalar('losses/xeu', loss_tcr) loss = tf.nn.softmax_cross_entropy_with_logits_v2(labels=l, logits=logits_x) loss = tf.reduce_mean(loss) tf.summary.scalar('losses/xe', loss) # L2 regularization loss_wd = sum(tf.nn.l2_loss(v) for v in utils.model_vars('classify') if 'kernel' in v.name) tf.summary.scalar('losses/wd', loss_wd) ema = tf.train.ExponentialMovingAverage(decay=ema) ema_op = ema.apply(utils.model_vars()) ema_getter = functools.partial(utils.getter_ema, ema) post_ops.append(ema_op) train_op = tf.train.MomentumOptimizer(lr, 0.9, use_nesterov=True).minimize( loss + loss_tcr * warmup * consistency_weight + wd * loss_wd, colocate_gradients_with_ops=True) with tf.control_dependencies([train_op]): train_op = tf.group(*post_ops) return EasyDict( xt=xt_in, x=x_in, y=y_in, label=l_in, train_op=train_op, classify_raw=tf.nn.softmax(classifier(x_in, training=False)), # No EMA, for debugging. classify_op=tf.nn.softmax(classifier(x_in, getter=ema_getter, training=False)))
def model(self, batch, lr, wd, ema, **kwargs): hwc = [self.dataset.height, self.dataset.width, self.dataset.colors] xt_in = tf.placeholder(tf.float32, [batch] + hwc, 'xt') # For training x_in = tf.placeholder(tf.float32, [None] + hwc, 'x') y_in = tf.placeholder(tf.float32, [batch] + hwc, 'y') l_in = tf.placeholder(tf.int32, [batch], 'labels') wd *= lr classifier = lambda x, **kw: self.classifier(x, **kw, **kwargs).logits def get_logits(x): logits = classifier(x, training=True) return logits x, labels_x = self.augment(xt_in, tf.one_hot(l_in, self.nclass), **kwargs) logits_x = get_logits(x) post_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) y, labels_y = self.augment(y_in, tf.nn.softmax(get_logits(y_in)), **kwargs) labels_y = tf.stop_gradient(labels_y) logits_y = get_logits(y) loss_xe = tf.nn.softmax_cross_entropy_with_logits_v2(labels=labels_x, logits=logits_x) loss_xe = tf.reduce_mean(loss_xe) loss_xeu = tf.nn.softmax_cross_entropy_with_logits_v2(labels=labels_y, logits=logits_y) loss_xeu = tf.reduce_mean(loss_xeu) tf.summary.scalar('losses/xe', loss_xe) tf.summary.scalar('losses/xeu', loss_xeu) ema = tf.train.ExponentialMovingAverage(decay=ema) ema_op = ema.apply(utils.model_vars()) ema_getter = functools.partial(utils.getter_ema, ema) post_ops.append(ema_op) post_ops.extend([ tf.assign(v, v * (1 - wd)) for v in utils.model_vars('classify') if 'kernel' in v.name ]) train_op = tf.train.AdamOptimizer(lr).minimize( loss_xe + loss_xeu, colocate_gradients_with_ops=True) with tf.control_dependencies([train_op]): train_op = tf.group(*post_ops) return EasyDict( xt=xt_in, x=x_in, y=y_in, label=l_in, train_op=train_op, classify_raw=tf.nn.softmax(classifier( x_in, training=False)), # No EMA, for debugging. classify_op=tf.nn.softmax( classifier(x_in, getter=ema_getter, training=False)))
def guess_label(self, y, classifier, T, **kwargs): del kwargs logits_y = [classifier(yi, training=True) for yi in y] logits_y = tf.concat(logits_y, 0) # Compute predicted probability distribution py. p_model_y = tf.reshape(tf.nn.softmax(logits_y), [len(y), -1, self.nclass]) p_model_y = tf.reduce_mean(p_model_y, axis=0) # Compute the target distribution. p_target = tf.pow(p_model_y, 1. / T) p_target /= tf.reduce_sum(p_target, axis=1, keep_dims=True) return EasyDict(p_target=p_target, p_model=p_model_y)
def classifier(self, x, scales, filters, training, getter=None, **kwargs): del kwargs assert scales == 3 # Only specified for 32x32 inputs. conv_args = dict(kernel_size=3, activation=tf.nn.leaky_relu, padding='same') bn_args = dict(training=training, momentum=0.999) with tf.variable_scope('classify', reuse=tf.AUTO_REUSE, custom_getter=getter): y = tf.layers.conv2d((x - self.dataset.mean) / self.dataset.std, filters, **conv_args) y = tf.layers.batch_normalization(y, **bn_args) y = tf.layers.conv2d(y, filters, **conv_args) y = tf.layers.batch_normalization(y, **bn_args) y = tf.layers.conv2d(y, filters, **conv_args) y = tf.layers.batch_normalization(y, **bn_args) y = tf.layers.max_pooling2d(y, 2, 2) y = tf.layers.conv2d(y, 2 * filters, **conv_args) y = tf.layers.batch_normalization(y, **bn_args) y = tf.layers.conv2d(y, 2 * filters, **conv_args) y = tf.layers.batch_normalization(y, **bn_args) y = tf.layers.conv2d(y, 2 * filters, **conv_args) y = tf.layers.batch_normalization(y, **bn_args) y = tf.layers.max_pooling2d(y, 2, 2) y = tf.layers.conv2d(y, 4 * filters, kernel_size=3, activation=tf.nn.leaky_relu, padding='valid') y = tf.layers.batch_normalization(y, **bn_args) y = tf.layers.conv2d(y, 2 * filters, kernel_size=1, activation=tf.nn.leaky_relu, padding='same') y = tf.layers.batch_normalization(y, **bn_args) y = tf.layers.conv2d(y, 1 * filters, kernel_size=1, activation=tf.nn.leaky_relu, padding='same') y = tf.layers.batch_normalization(y, **bn_args) y = tf.reduce_mean(y, [1, 2]) # (b, 6, 6, 128) -> (b, 128) logits = tf.layers.dense(y, self.nclass) return EasyDict(logits=logits, embeds=y)
def _load_stl10(): def unflatten(images): return np.transpose(images.reshape((-1, 3, 96, 96)), [0, 3, 2, 1]) with tempfile.NamedTemporaryFile() as f: if tf.gfile.Exists('stl10/stl10_binary.tar.gz'): f = tf.gfile.Open('stl10/stl10_binary.tar.gz', 'rb') else: request.urlretrieve(URLS['stl10'], f.name) tar = tarfile.open(fileobj=f) train_X = tar.extractfile('stl10_binary/train_X.bin') train_y = tar.extractfile('stl10_binary/train_y.bin') test_X = tar.extractfile('stl10_binary/test_X.bin') test_y = tar.extractfile('stl10_binary/test_y.bin') unlabeled_X = tar.extractfile('stl10_binary/unlabeled_X.bin') train_set = { 'images': np.frombuffer(train_X.read(), dtype=np.uint8), 'labels': np.frombuffer(train_y.read(), dtype=np.uint8) - 1 } test_set = { 'images': np.frombuffer(test_X.read(), dtype=np.uint8), 'labels': np.frombuffer(test_y.read(), dtype=np.uint8) - 1 } _imgs = np.frombuffer(unlabeled_X.read(), dtype=np.uint8) unlabeled_set = { 'images': _imgs, 'labels': np.zeros(100000, dtype=np.uint8) } fold_indices = tar.extractfile('stl10_binary/fold_indices.txt').read() train_set['images'] = _encode_png(unflatten(train_set['images'])) test_set['images'] = _encode_png(unflatten(test_set['images'])) unlabeled_set['images'] = _encode_png(unflatten(unlabeled_set['images'])) return dict( train=train_set, test=test_set, unlabeled=unlabeled_set, files=[EasyDict(filename="stl10_fold_indices.txt", data=fold_indices)])
def model(self, batch, lr, wd, ema, warmup_pos, vat, vat_eps, entmin_weight, **kwargs): hwc = [self.dataset.height, self.dataset.width, self.dataset.colors] xt_in = tf.placeholder(tf.float32, [batch] + hwc, 'xt') # For training x_in = tf.placeholder(tf.float32, [None] + hwc, 'x') y_in = tf.placeholder(tf.float32, [batch] + hwc, 'y') l_in = tf.placeholder(tf.int32, [batch], 'labels') wd *= lr warmup = tf.clip_by_value(tf.to_float(self.step) / (warmup_pos * (FLAGS.train_kimg << 10)), 0, 1) classifier = lambda x, **kw: self.classifier(x, **kw, **kwargs).logits l = tf.one_hot(l_in, self.nclass) logits_x = classifier(xt_in, training=True) post_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # Take only first call to update batch norm. logits_y = classifier(y_in, training=True) delta_y = vat_utils.generate_perturbation(y_in, logits_y, lambda x: classifier(x, training=True), vat_eps) logits_student = classifier(y_in + delta_y, training=True) logits_teacher = tf.stop_gradient(logits_y) loss_vat = layers.kl_divergence_from_logits(logits_student, logits_teacher) loss_vat = tf.reduce_mean(loss_vat) loss_entmin = tf.reduce_mean(tf.distributions.Categorical(logits=logits_y).entropy()) loss = tf.nn.softmax_cross_entropy_with_logits_v2(labels=l, logits=logits_x) loss = tf.reduce_mean(loss) tf.summary.scalar('losses/xe', loss) tf.summary.scalar('losses/vat', loss_vat) tf.summary.scalar('losses/entmin', loss_entmin) ema = tf.train.ExponentialMovingAverage(decay=ema) ema_op = ema.apply(utils.model_vars()) ema_getter = functools.partial(utils.getter_ema, ema) post_ops.append(ema_op) post_ops.extend([tf.assign(v, v * (1 - wd)) for v in utils.model_vars('classify') if 'kernel' in v.name]) train_op = tf.train.AdamOptimizer(lr).minimize(loss + loss_vat * warmup * vat + entmin_weight * loss_entmin, colocate_gradients_with_ops=True) with tf.control_dependencies([train_op]): train_op = tf.group(*post_ops) return EasyDict( xt=xt_in, x=x_in, y=y_in, label=l_in, train_op=train_op, classify_raw=tf.nn.softmax(classifier(x_in, training=False)), # No EMA, for debugging. classify_op=tf.nn.softmax(classifier(x_in, getter=ema_getter, training=False)))
def classifier(self, x, scales, filters, repeat, training, getter=None, dropout=0, **kwargs): del kwargs leaky_relu = functools.partial(tf.nn.leaky_relu, alpha=0.1) bn_args = dict(training=training, momentum=0.999) def conv_args(k, f): return dict(padding='same', kernel_initializer=tf.random_normal_initializer( stddev=tf.rsqrt(0.5 * k * k * f))) def residual(x0, filters, stride=1, activate_before_residual=False): x = leaky_relu(tf.layers.batch_normalization(x0, **bn_args)) if activate_before_residual: x0 = x x = tf.layers.conv2d(x, filters, 3, strides=stride, **conv_args(3, filters)) x = leaky_relu(tf.layers.batch_normalization(x, **bn_args)) x = tf.layers.conv2d(x, filters, 3, **conv_args(3, filters)) if x0.get_shape()[3] != filters: x0 = tf.layers.conv2d(x0, filters, 1, strides=stride, **conv_args(1, filters)) return x0 + x with tf.variable_scope('classify', reuse=tf.AUTO_REUSE, custom_getter=getter): y = tf.layers.conv2d((x - self.dataset.mean) / self.dataset.std, 16, 3, **conv_args(3, 16)) for scale in range(scales): y = residual(y, filters << scale, stride=2 if scale else 1, activate_before_residual=scale == 0) for i in range(repeat - 1): y = residual(y, filters << scale) y = leaky_relu(tf.layers.batch_normalization(y, **bn_args)) y = embeds = tf.reduce_mean(y, [1, 2]) if dropout and training: y = tf.nn.dropout(y, 1 - dropout) logits = tf.layers.dense( y, self.nclass, kernel_initializer=tf.glorot_normal_initializer()) return EasyDict(logits=logits, embeds=embeds)
import functools import itertools import multiprocessing import random import numpy as np import tensorflow as tf from absl import flags from libml import utils, ctaugment from libml.utils import EasyDict from third_party.auto_augment import augmentations, policies FLAGS = flags.FLAGS POOL = None POLICIES = EasyDict(fixmatch_train=policies.cifar10_policies()) RANDOM_POLICY_OPS = ( 'Identity', 'AutoContrast', 'Equalize', 'Rotate', 'Solarize', 'Color', 'Contrast', 'Brightness', 'Sharpness', 'ShearX', 'TranslateX', 'TranslateY', 'Posterize', 'ShearY' ) AUGMENT_ENUM = 'd x m aa aac ra rac'.split() + ['r%d_%d_%d' % (nops, mag, cutout) for nops, mag, cutout in itertools.product(range(1, 5), range(1, 16), range(0, 100, 25))] + [ 'rac%d' % (mag) for mag in range(1, 10)] flags.DEFINE_integer('K', 1, 'Number of strong augmentation for unlabeled data.') flags.DEFINE_enum('augment', 'd.d', [x + '.' + y for x, y in itertools.product(AUGMENT_ENUM, AUGMENT_ENUM)] + [x + '.' + y + '.' + z for x, y, z in itertools.product(AUGMENT_ENUM, AUGMENT_ENUM, AUGMENT_ENUM)] + [
def model(self, batch, lr, wd, beta, w_kl, w_match, w_rot, K, use_xe, warmup_kimg=1024, T=0.5, mixmode='xxy.yxy', dbuf=128, ema=0.999, **kwargs): hwc = [self.dataset.height, self.dataset.width, self.dataset.colors] xt_in = tf.placeholder(tf.float32, [batch] + hwc, 'xt') # For training x_in = tf.placeholder(tf.float32, [None] + hwc, 'x') y_in = tf.placeholder(tf.float32, [batch, K + 1] + hwc, 'y') l_in = tf.placeholder(tf.int32, [batch], 'labels') wd *= lr w_match *= tf.clip_by_value( tf.cast(self.step, tf.float32) / (warmup_kimg << 10), 0, 1) augment = layers.MixMode(mixmode) gpu = utils.get_gpu() def classifier_to_gpu(x, **kw): with tf.device(next(gpu)): return self.classifier(x, **kw, **kwargs).logits def random_rotate(x): b4 = batch // 4 x, xt = x[:2 * b4], tf.transpose(x[2 * b4:], [0, 2, 1, 3]) l = np.zeros(b4, np.int32) l = tf.constant(np.concatenate([l, l + 1, l + 2, l + 3], axis=0)) return tf.concat( [x[:b4], x[b4:, ::-1, ::-1], xt[:b4, ::-1], xt[b4:, :, ::-1]], axis=0), l # Moving average of the current estimated label distribution p_model = layers.PMovingAverage('p_model', self.nclass, dbuf) p_target = layers.PMovingAverage( 'p_target', self.nclass, dbuf) # Rectified distribution (only for plotting) # Known (or inferred) true unlabeled distribution p_data = layers.PData(self.dataset) if w_rot > 0: rot_y, rot_l = random_rotate(y_in[:, 1]) with tf.device(next(gpu)): rot_logits = self.classifier_rot( self.classifier(rot_y, training=True, **kwargs).embeds) loss_rot = tf.nn.softmax_cross_entropy_with_logits_v2( labels=tf.one_hot(rot_l, 4), logits=rot_logits) loss_rot = tf.reduce_mean(loss_rot) tf.summary.scalar('losses/rot', loss_rot) else: loss_rot = 0 if kwargs['redux'] == '1st' and w_kl <= 0: logits_y = [classifier_to_gpu(y_in[:, 0], training=True)] * (K + 1) elif kwargs['redux'] == '1st': logits_y = [ classifier_to_gpu(y_in[:, i], training=True) for i in range(2) ] logits_y += logits_y[:1] * (K - 1) else: logits_y = [ classifier_to_gpu(y_in[:, i], training=True) for i in range(K + 1) ] guess = self.guess_label(logits_y, p_data(), p_model(), T=T, **kwargs) ly = tf.stop_gradient(guess.p_target) if w_kl > 0: w_kl *= tf.clip_by_value( tf.cast(self.step, tf.float32) / (warmup_kimg << 10), 0, 1) loss_kl = tf.nn.softmax_cross_entropy_with_logits_v2( labels=ly[:batch], logits=logits_y[1]) loss_kl = tf.reduce_mean(loss_kl) tf.summary.scalar('losses/kl', loss_kl) else: loss_kl = 0 del logits_y lx = tf.one_hot(l_in, self.nclass) xy, labels_xy = augment([xt_in] + [y_in[:, i] for i in range(K + 1)], [lx] + tf.split(ly, K + 1), [beta, beta]) x, y = xy[0], xy[1:] labels_x, labels_y = labels_xy[0], tf.concat(labels_xy[1:], 0) del xy, labels_xy batches = layers.interleave([x] + y, batch) logits = [classifier_to_gpu(yi, training=True) for yi in batches[:-1]] skip_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) logits.append(classifier_to_gpu(batches[-1], training=True)) post_ops = [ v for v in tf.get_collection(tf.GraphKeys.UPDATE_OPS) if v not in skip_ops ] logits = layers.interleave(logits, batch) logits_x = logits[0] logits_y = tf.concat(logits[1:], 0) del batches, logits loss_xe = tf.nn.softmax_cross_entropy_with_logits_v2(labels=labels_x, logits=logits_x) loss_xe = tf.reduce_mean(loss_xe) if use_xe: loss_xeu = tf.nn.softmax_cross_entropy_with_logits_v2( labels=labels_y, logits=logits_y) else: loss_xeu = tf.square(labels_y - tf.nn.softmax(logits_y)) loss_xeu = tf.reduce_mean(loss_xeu) tf.summary.scalar('losses/xe', loss_xe) tf.summary.scalar('losses/%s' % ('xeu' if use_xe else 'l2u'), loss_xeu) self.distribution_summary(p_data(), p_model(), p_target()) ema = tf.train.ExponentialMovingAverage(decay=ema) ema_op = ema.apply(utils.model_vars()) ema_getter = functools.partial(utils.getter_ema, ema) post_ops.extend([ ema_op, p_model.update(guess.p_model), p_target.update(guess.p_target) ]) if p_data.has_update: post_ops.append(p_data.update(lx)) post_ops.extend([ tf.assign(v, v * (1 - wd)) for v in utils.model_vars('classify') if 'kernel' in v.name ]) train_op = tf.train.AdamOptimizer(lr).minimize( loss_xe + w_kl * loss_kl + w_match * loss_xeu + w_rot * loss_rot, colocate_gradients_with_ops=True) with tf.control_dependencies([train_op]): train_op = tf.group(*post_ops) return EasyDict( xt=xt_in, x=x_in, y=y_in, label=l_in, train_op=train_op, classify_op=tf.nn.softmax( classifier_to_gpu(x_in, getter=ema_getter, training=False)), classify_raw=tf.nn.softmax(classifier_to_gpu( x_in, training=False))) # No EMA, for debugging.
import itertools import multiprocessing import random import numpy as np import tensorflow as tf from absl import flags from libml import utils, ctaugment from libml.utils import EasyDict from third_party.auto_augment import augmentations, policies FLAGS = flags.FLAGS POOL = None POLICIES = EasyDict(cifar10=policies.cifar10_policies(), cifar100=policies.cifar10_policies(), svhn=policies.svhn_policies(), svhn_noextra=policies.svhn_policies()) RANDOM_POLICY_OPS = ('Identity', 'AutoContrast', 'Equalize', 'Rotate', 'Solarize', 'Color', 'Contrast', 'Brightness', 'Sharpness', 'ShearX', 'TranslateX', 'TranslateY', 'Posterize', 'ShearY') AUGMENT_ENUM = 'd x m aa aac ra rac'.split() + [ 'r%d_%d_%d' % (nops, mag, cutout) for nops, mag, cutout in itertools.product(range(1, 5), range(1, 16), range(0, 100, 25)) ] + ['rac%d' % (mag) for mag in range(1, 10)] flags.DEFINE_integer('K', 1, 'Number of strong augmentation for unlabeled data.') flags.DEFINE_enum(
import itertools import multiprocessing import random import numpy as np import tensorflow as tf from absl import flags from libml import utils, ctaugment from libml.utils import EasyDict from third_party.auto_augment import augmentations, policies FLAGS = flags.FLAGS POOL = None POLICIES = EasyDict(cifar10=policies.cifar10_policies(), svhn=policies.svhn_policies(), svhn_noextra=policies.svhn_policies()) RANDOM_POLICY_OPS = ( 'Identity', 'AutoContrast', 'Equalize', 'Rotate', 'Solarize', 'Color', 'Contrast', 'Brightness', 'Sharpness', 'ShearX', 'TranslateX', 'TranslateY', 'Posterize', 'ShearY' ) AUGMENT_ENUM = 'd x m aa'.split() + ['r%d_%d_%d' % (nops, mag, cutout) for nops, mag, cutout in itertools.product(range(1, 5), range(1, 16), range(0, 100, 25))] flags.DEFINE_integer('K', 1, 'Number of strong augmentation for unlabeled data.') flags.DEFINE_enum('augment', 'd.d', [x + '.' + y for x, y in itertools.product(AUGMENT_ENUM, AUGMENT_ENUM)] + [x + '.' + y + '.' + z for x, y, z in itertools.product(AUGMENT_ENUM, 'dx', AUGMENT_ENUM)],
import numpy as np import tensorflow as tf from absl import flags from libml import utils, ctaugment from libml.utils import EasyDict from third_party.auto_augment import augmentations, policies FLAGS = flags.FLAGS POOL = None POLICIES = EasyDict( cifar10=policies.cifar10_policies(), cifar10p=policies.cifar10_policies(), # color=policies.color_policies(), cifar10imb=policies.cifar10_policies(), cifar100=policies.cifar10_policies(), svhn=policies.svhn_policies(), svhnp=policies.svhn_policies(), svhnp_noextra=policies.svhn_policies(), svhn_noextra=policies.svhn_policies()) RANDOM_POLICY_OPS = ('Identity', 'AutoContrast', 'Equalize', 'Rotate', 'Solarize', 'Color', 'Contrast', 'Brightness', 'Sharpness', 'ShearX', 'TranslateX', 'TranslateY', 'Posterize', 'ShearY') AUGMENT_ENUM = 'd x m aa aac aacc ra rac'.split() + [ 'r%d_%d_%d' % (nops, mag, cutout) for nops, mag, cutout in itertools.product(range(1, 5), range(1, 16), range(0, 100, 25)) ] + ['rac%d' % (mag) for mag in range(1, 10)]
def model(self, batch, lr, wd, ema, warmup_pos, consistency_weight, threshold, **kwargs): hwc = [self.dataset.height, self.dataset.width, self.dataset.colors] xt_in = tf.placeholder(tf.float32, [batch] + hwc, 'xt') # For training x_in = tf.placeholder(tf.float32, [None] + hwc, 'x') y_in = tf.placeholder(tf.float32, [batch] + hwc, 'y') l_in = tf.placeholder(tf.int32, [batch], 'labels') l = tf.one_hot(l_in, self.nclass) warmup = tf.clip_by_value( tf.to_float(self.step) / (warmup_pos * (FLAGS.train_kimg << 10)), 0, 1) lrate = tf.clip_by_value( tf.to_float(self.step) / (FLAGS.train_kimg << 10), 0, 1) lr *= tf.cos(lrate * (7 * np.pi) / (2 * 8)) tf.summary.scalar('monitors/lr', lr) classifier = lambda x, **kw: self.classifier(x, **kw, **kwargs).logits logits_x = classifier(xt_in, training=True) post_ops = tf.get_collection( tf.GraphKeys.UPDATE_OPS ) # Take only first call to update batch norm. logits_y = classifier(y_in, training=True) # Get the pseudo-label loss loss_pl = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=tf.argmax(logits_y, axis=-1), logits=logits_y) # Masks denoting which data points have high-confidence predictions greater_than_thresh = tf.reduce_any( tf.greater(tf.nn.softmax(logits_y), threshold), axis=-1, keepdims=True, ) greater_than_thresh = tf.cast(greater_than_thresh, loss_pl.dtype) # Only enforce the loss when the model is confident loss_pl *= greater_than_thresh # Note that we also average over examples without confident outputs; # this is consistent with the realistic evaluation codebase loss_pl = tf.reduce_mean(loss_pl) loss = tf.nn.softmax_cross_entropy_with_logits_v2(labels=l, logits=logits_x) loss = tf.reduce_mean(loss) tf.summary.scalar('losses/xe', loss) tf.summary.scalar('losses/pl', loss_pl) # L2 regularization loss_wd = sum( tf.nn.l2_loss(v) for v in utils.model_vars('classify') if 'kernel' in v.name) tf.summary.scalar('losses/wd', loss_wd) ema = tf.train.ExponentialMovingAverage(decay=ema) ema_op = ema.apply(utils.model_vars()) ema_getter = functools.partial(utils.getter_ema, ema) post_ops.append(ema_op) train_op = tf.train.MomentumOptimizer( lr, 0.9, use_nesterov=True).minimize( loss + loss_pl * warmup * consistency_weight + wd * loss_wd, colocate_gradients_with_ops=True) with tf.control_dependencies([train_op]): train_op = tf.group(*post_ops) return EasyDict( xt=xt_in, x=x_in, y=y_in, label=l_in, train_op=train_op, classify_raw=tf.nn.softmax(classifier( x_in, training=False)), # No EMA, for debugging. classify_op=tf.nn.softmax( classifier(x_in, getter=ema_getter, training=False)))
def classifier(self, x, scales, filters, repeat, training, getter=None, dropout=0, **kwargs): del kwargs bn_args = dict(training=training, momentum=0.999) def conv_args(k, f): return dict(padding='same', use_bias=False, kernel_initializer=tf.random_normal_initializer( stddev=tf.rsqrt(0.5 * k * k * f))) def residual(x0, filters, stride=1): def branch(): x = tf.nn.relu(x0) x = tf.layers.conv2d(x, filters, 3, strides=stride, **conv_args(3, filters)) x = tf.nn.relu(tf.layers.batch_normalization(x, **bn_args)) x = tf.layers.conv2d(x, filters, 3, **conv_args(3, filters)) x = tf.layers.batch_normalization(x, **bn_args) return x x = layers.shakeshake(branch(), branch(), training) if stride == 2: x1 = tf.layers.conv2d(tf.nn.relu(x0[:, ::2, ::2]), filters >> 1, 1, **conv_args(1, filters >> 1)) x2 = tf.layers.conv2d(tf.nn.relu(x0[:, 1::2, 1::2]), filters >> 1, 1, **conv_args(1, filters >> 1)) x0 = tf.concat([x1, x2], axis=3) x0 = tf.layers.batch_normalization(x0, **bn_args) elif x0.get_shape()[3] != filters: x0 = tf.layers.conv2d(x0, filters, 1, **conv_args(1, filters)) x0 = tf.layers.batch_normalization(x0, **bn_args) return x0 + x with tf.variable_scope('classify', reuse=tf.AUTO_REUSE, custom_getter=getter): y = tf.layers.conv2d((x - self.dataset.mean) / self.dataset.std, 16, 3, **conv_args(3, 16)) for scale, i in itertools.product(range(scales), range(repeat)): with tf.variable_scope('layer%d.%d' % (scale + 1, i)): if i == 0: y = residual(y, filters << scale, stride=2 if scale else 1) else: y = residual(y, filters << scale) y = embeds = tf.reduce_mean(y, [1, 2]) if dropout and training: y = tf.nn.dropout(y, 1 - dropout) logits = tf.layers.dense( y, self.nclass, kernel_initializer=tf.glorot_normal_initializer()) return EasyDict(logits=logits, embeds=embeds)
def model(self, batch, lr, wd, ema, beta, w_match, warmup_kimg=1024, nu=2, mixmode='xxy.yxy', dbuf=128, **kwargs): hwc = [self.dataset.height, self.dataset.width, self.dataset.colors] xt_in = tf.placeholder(tf.float32, [batch] + hwc, 'xt') # For training x_in = tf.placeholder(tf.float32, [None] + hwc, 'x') y_in = tf.placeholder(tf.float32, [batch, nu] + hwc, 'y') l_in = tf.placeholder(tf.int32, [batch], 'labels') wd *= lr w_match *= tf.clip_by_value( tf.cast(self.step, tf.float32) / (warmup_kimg << 10), 0, 1) augment = MixMode(mixmode) classifier = lambda x, **kw: self.classifier(x, **kw, **kwargs).logits # Moving average of the current estimated label distribution p_model = layers.PMovingAverage('p_model', self.nclass, dbuf) p_target = layers.PMovingAverage( 'p_target', self.nclass, dbuf) # Rectified distribution (only for plotting) # Known (or inferred) true unlabeled distribution p_data = layers.PData(self.dataset) y = tf.reshape(tf.transpose(y_in, [1, 0, 2, 3, 4]), [-1] + hwc) guess = self.guess_label(tf.split(y, nu), classifier, T=0.5, **kwargs) ly = tf.stop_gradient(guess.p_target) lx = tf.one_hot(l_in, self.nclass) xy, labels_xy = augment([xt_in] + tf.split(y, nu), [lx] + [ly] * nu, [beta, beta]) x, y = xy[0], xy[1:] labels_x, labels_y = labels_xy[0], tf.concat(labels_xy[1:], 0) del xy, labels_xy batches = layers.interleave([x] + y, batch) skip_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) logits = [classifier(batches[0], training=True)] post_ops = [ v for v in tf.get_collection(tf.GraphKeys.UPDATE_OPS) if v not in skip_ops ] for batchi in batches[1:]: logits.append(classifier(batchi, training=True)) logits = layers.interleave(logits, batch) logits_x = logits[0] logits_y = tf.concat(logits[1:], 0) loss_xe = tf.nn.softmax_cross_entropy_with_logits_v2(labels=labels_x, logits=logits_x) loss_xe = tf.reduce_mean(loss_xe) loss_l2u = tf.square(labels_y - tf.nn.softmax(logits_y)) loss_l2u = tf.reduce_mean(loss_l2u) tf.summary.scalar('losses/xe', loss_xe) tf.summary.scalar('losses/l2u', loss_l2u) self.distribution_summary(p_data(), p_model(), p_target()) ema = tf.train.ExponentialMovingAverage(decay=ema) ema_op = ema.apply(utils.model_vars()) ema_getter = functools.partial(utils.getter_ema, ema) post_ops.extend([ ema_op, p_model.update(guess.p_model), p_target.update(guess.p_target) ]) if p_data.has_update: post_ops.append(p_data.update(lx)) post_ops.extend([ tf.assign(v, v * (1 - wd)) for v in utils.model_vars('classify') if 'kernel' in v.name ]) train_op = tf.train.AdamOptimizer(lr).minimize( loss_xe + w_match * loss_l2u, colocate_gradients_with_ops=True) with tf.control_dependencies([train_op]): train_op = tf.group(*post_ops) return EasyDict( xt=xt_in, x=x_in, y=y_in, label=l_in, train_op=train_op, classify_raw=tf.nn.softmax(classifier( x_in, training=False)), # No EMA, for debugging. classify_op=tf.nn.softmax( classifier(x_in, getter=ema_getter, training=False)))