Python local_device_setterの例

プログラミング言語: Python

名前空間/パッケージ名: utils

メソッド/関数: local_device_setter

hotexamples.comのコード掲載数: 7

Python local_device_setter - 7件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのutils.local_device_setterの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

    def model_fn(features, labels, mode):
        """Inception_Resnet_V2 model body.
        Support single host, one or more GPU training. Parameter distribution can
        be either one of the following scheme.
        1. CPU is the parameter server and manages gradient updates.
        2. Parameters are distributed evenly across all GPUs, and the first GPU
        manages gradient updates.
        Args:
        features: a list of tensors, one for each tower
        labels: a list of tensors, one for each tower
        mode: ModeKeys.TRAIN or EVAL
        Returns:
        A EstimatorSpec object.
        """
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        tower_features = features
        tower_labels = labels
        tower_losses = []
        tower_gradvars = []
        tower_preds = []

        # channels first (NCHW) is normally optimal on GPU and channels last (NHWC)
        # on CPU. The exception is Intel MKL on CPU which is optimal with
        # channels_last.
        data_format = None
        if not data_format:
            if GPU_COUNT == 0:
                data_format = 'channels_last'
            else:
                data_format = 'channels_first'

        if GPU_COUNT == 0:
            num_devices = 1
            device_type = 'cpu'
        else:
            num_devices = GPU_COUNT
            device_type = 'gpu'

        for i in range(num_devices):
            worker_device = '/{}:{}'.format(device_type, i)
            if VARIABLE_STRATEGY == 'CPU':
                device_setter = utils.local_device_setter(
                    worker_device=worker_device)
            elif VARIABLE_STRATEGY == 'GPU':
                device_setter = utils.local_device_setter(
                    ps_device_type='gpu',
                    worker_device=worker_device,
                    ps_strategy=tf.contrib.training.GreedyLoadBalancingStrategy(
                        GPU_COUNT, tf.contrib.training.byte_size_load_fn))
            with tf.variable_scope('', reuse=bool(i != 0)):
                with tf.name_scope('tower_%d' % i) as name_scope:
                    with tf.device(device_setter):
                        loss, gradvars, preds = tower_fn(is_training, tower_features[i],
                                                         tower_labels and tower_labels[i], num_classes)
                        tower_losses.append(loss)
                        tower_gradvars.append(gradvars)
                        tower_preds.append(preds)
                        if i == 0:
                            # Only trigger batch_norm moving mean and variance update from
                            # the 1st tower. Ideally, we should grab the updates from all
                            # towers but these stats accumulate extremely fast so we can
                            # ignore the other stats from the other towers without
                            # significant detriment.
                            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS,
                                                           name_scope)
        if mode == 'train' or mode == 'eval':
            # Now compute global loss and gradients.
            gradvars = []
            with tf.name_scope('gradient_ing'):
                all_grads = {}
                for grad, var in itertools.chain(*tower_gradvars):
                    if grad is not None:
                        all_grads.setdefault(var, []).append(grad)
                for var, grads in six.iteritems(all_grads):
                    # Average gradients on the same device as the variables
                    # to which they apply.
                    with tf.device(var.device):
                        if len(grads) == 1:
                            avg_grad = grads[0]
                        else:
                            avg_grad = tf.multiply(
                                tf.add_n(grads), 1. / len(grads))
                    gradvars.append((avg_grad, var))

            # Device that runs the ops to apply global gradient updates.
            consolidation_device = '/gpu:0' if VARIABLE_STRATEGY == 'GPU' else '/cpu:0'
            with tf.device(consolidation_device):
                loss = tf.reduce_mean(tower_losses, name='loss')

                examples_sec_hook = utils.ExamplesPerSecondHook(
                    BATCH_SIZE, every_n_steps=10)

                global_step = tf.train.get_global_step()

                learning_rate = tf.constant(LEARNING_RATE)

                tensors_to_log = {'learning_rate': learning_rate, 'loss': loss}

                logging_hook = tf.train.LoggingTensorHook(
                    tensors=tensors_to_log, every_n_iter=100)

                initializer_hook = utils.IteratorInitializerHook()

                train_hooks = [initializer_hook, logging_hook, examples_sec_hook]

                optimizer = tf.train.MomentumOptimizer(
                    learning_rate=LEARNING_RATE, momentum=MOMENTUM)

                # Create single grouped train op
                train_op = [
                    optimizer.apply_gradients(gradvars, global_step=global_step)
                ]
                train_op.extend(update_ops)
                train_op = tf.group(*train_op)

                predictions = {
                    'classes':
                        tf.concat([p['classes'] for p in tower_preds], axis=0),
                    'probabilities':
                        tf.concat([p['probabilities']
                                for p in tower_preds], axis=0)
                }
                stacked_labels = tf.concat(labels, axis=0)
                metrics = {
                    'accuracy':
                        tf.metrics.accuracy(stacked_labels, predictions['classes'])
                }

            return tf.estimator.EstimatorSpec(
                mode=mode,
                predictions=predictions,
                loss=loss,
                train_op=train_op,
                training_hooks=train_hooks,
                eval_metric_ops=metrics)
        else:
            predictions = {
                'classes':
                    tf.concat([p['classes'] for p in tower_preds], axis=0),
                'probabilities':
                    tf.concat([p['probabilities']
                            for p in tower_preds], axis=0),
                'features': tf.concat([feature for feature in features], axis=0)
            }
            return tf.estimator.EstimatorSpec(
                mode=mode,
                predictions=predictions)

コード例 #2

ファイルを表示

ファイル: models.py プロジェクト: xiaoyubing/StrokeBasedRNN

def model_signature(input_placeholder, mode, params):
    features = input_placeholder.x
    labels = input_placeholder.y
    length = input_placeholder.length
    len_per_stroke = input_placeholder.len_per_stroke
    is_training = input_placeholder.is_training
    strokes_features = input_placeholder.strokes_features
    global_features = input_placeholder.global_features

    loss_function = _loss_2logits
    # loss_function = _loss_siamese

    losses_all_tower = []
    stroke_losses_all_tower = []
    distance_all_tower = []
    prediction_all_tower = []
    if params.stroke_base:
        features = tf.reshape(features,
                              [-1, params.length_per_signature, params.length_per_stroke, params.features])
        len_stroke_tower = tf.split(len_per_stroke, params.num_gpus, axis=0)
    else:
        features = tf.reshape(
            features, [-1, params.max_sequence_length, params.features])
    features = tf.cast(features, tf.float32)
    input_all_tower = tf.split(features, params.num_gpus, axis=0)
    length_all_tower = tf.split(length, params.num_gpus, axis=0)
    strokes_features_tower = tf.split(
        strokes_features, params.num_gpus, axis=0)
    global_features_tower = tf.split(
        global_features, params.num_gpus, axis=0)

    labels_all_tower = [None, None, None]
    if labels is not None:
        labels = tf.cast(labels, tf.float32)
        labels = tf.reshape(labels, [-1])
        labels_all_tower = tf.split(labels, params.num_gpus, axis=0)

    for i in range(params.num_gpus):
        worker_device = '/{}:{}'.format('gpu', i)
        input_tower = input_all_tower[i]

        device_setter = utils.local_device_setter(
            ps_device_type='gpu',
            worker_device=worker_device,
            ps_strategy=tf.contrib.training.GreedyLoadBalancingStrategy(
                params.num_gpus, tf.contrib.training.byte_size_load_fn))
        with tf.device(device_setter):
            len_stroke = None if not params.stroke_base else len_stroke_tower[i]
            loss, stroke_loss, distance, prediction = loss_function(input_tower, labels_all_tower[i],
                                                                    length_all_tower[i], len_stroke,
                                                                    strokes_features_tower[i],
                                                                    global_features_tower[i],
                                                                    params, is_training=is_training)
            if labels_all_tower is not None:
                losses_all_tower.append(loss)
            if stroke_loss is not None:
                stroke_losses_all_tower.append(stroke_loss)
            distance_all_tower.append(distance)
            prediction_all_tower.append(prediction)

    consolidation_device = '/cpu:0'
    with tf.device(consolidation_device):
        distance = tf.concat(distance_all_tower, 0)
        distance = tf.reshape(distance, [-1, 1])
        prediction = tf.concat(prediction_all_tower, 0)
        prediction = tf.reshape(prediction, [-1, 1])
        if mode == tf.estimator.ModeKeys.PREDICT:
            predictions = {'distance': distance}
            return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

        loss = tf.reduce_mean(losses_all_tower, 0)
        stroke_loss = tf.reduce_mean(stroke_losses_all_tower, 0)
        distance_norm = _normlize_distance(distance)
        labels = tf.reshape(labels, [-1, 1])

        accuracy_ops = tf.metrics.accuracy(labels, prediction)

        labels_2value = tf.where(
            tf.equal(labels, 2.0), tf.zeros_like(labels), labels)
        labels_2value = tf.reshape(labels_2value, [-1, 1])
        labels_reversal = tf.reshape(tf.subtract(tf.cast(1.0, tf.float32), labels_2value),
                                     [-1, 1])  # labels_ = !labels;

        positive_distance = tf.reduce_mean(
            tf.multiply(labels_2value, distance))
        negative_distance = tf.reduce_mean(
            tf.multiply(labels_reversal, distance))
        loss_summary = tf.summary.scalar('loss', loss)
        stroke_loss_summary = tf.summary.scalar('stroke_loss', stroke_loss)
        pos_summary = tf.summary.scalar('positive_distance', positive_distance)
        neg_summary = tf.summary.scalar('negative_distance', negative_distance)

        metric_ops = tf.metrics.auc(
            labels_reversal, distance_norm, name='auc_all')
        auc_summary = tf.summary.scalar('auc', metric_ops[1])
        accuracy_summary = tf.summary.scalar('accuracy', accuracy_ops[1])

        sec_at_spe_metric = tf.metrics.sensitivity_at_specificity(
            labels_reversal, distance_norm, 0.90)

        merged_summary = tf.summary.merge(
            [loss_summary, stroke_loss_summary, pos_summary, neg_summary, auc_summary, accuracy_summary])

        if mode == tf.estimator.ModeKeys.EVAL:
            eval_metric_ops = {'evaluation_auc': metric_ops,
                               'accuracy': accuracy_ops,
                               'sec_at_spe': sec_at_spe_metric}
            return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=eval_metric_ops)

        else:
            return loss, stroke_loss, distance, accuracy_ops[1], merged_summary

コード例 #3

ファイルを表示

ファイル: hg_main.py プロジェクト: ayushgaud/hourglass_CoordConvGRU

    def _hg_model_fn(features, labels, mode, params):
        """ HG model body.

    Support single host, one or more GPU training. Parameter distribution can
    be either one of the following scheme.
    1. CPU is the parameter server and manages gradient updates.
    2. Parameters are distributed evenly across all GPUs, and the first GPU
       manages gradient updates.

    Args:
      features: a list of tensors, one for each tower
      labels: a list of tensors, one for each tower
      mode: ModeKeys.TRAIN or EVAL
      params: Hyperparameters suitable for tuning
    Returns:
      A EstimatorSpec object.
    """
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        weight_decay = params.weight_decay
        momentum = params.momentum
        decay_factor = params.decay_factor
        decay_step = params.decay_step
        init_learning_rate = params.init_learning_rate
        num_stacks = params.num_stacks
        num_joints = params.num_joints

        tower_features = features
        if mode == tf.estimator.ModeKeys.PREDICT:
            if num_gpus < 1:
                tower_labels = [None]
            else:
                tower_labels = [None for i in range(num_gpus)]
        else:
            tower_labels = labels

        tower_losses = []
        tower_gradvars = []
        tower_preds = []

        # channels first (NCHW) is normally optimal on GPU and channels last (NHWC)
        # on CPU. The exception is Intel MKL on CPU which is optimal with
        # channels_last.
        data_format = params.data_format
        if not data_format:
            if num_gpus == 0:
                data_format = 'channels_last'
            else:
                data_format = 'channels_first'

        if num_gpus == 0:
            num_devices = 1
            device_type = 'cpu'
        else:
            num_devices = num_gpus
            device_type = 'gpu'

        for i in range(num_devices):
            worker_device = '/{}:{}'.format(device_type, i)
            if variable_strategy == 'CPU':
                device_setter = utils.local_device_setter(
                    worker_device=worker_device)
            elif variable_strategy == 'GPU':
                device_setter = utils.local_device_setter(
                    ps_device_type='gpu',
                    worker_device=worker_device,
                    ps_strategy=tf.contrib.training.
                    GreedyLoadBalancingStrategy(
                        num_gpus, tf.contrib.training.byte_size_load_fn))
            if mode == tf.estimator.ModeKeys.TRAIN:
                batch_size = params.train_batch_size / num_devices
            else:
                batch_size = params.eval_batch_size / num_devices

            with tf.variable_scope('hg', reuse=bool(i != 0)):
                with tf.name_scope('tower_%d' % i) as name_scope:
                    with tf.device(device_setter):
                        loss, gradvars, preds = _tower_fn(
                            mode, weight_decay, tower_features[i][0],
                            tower_labels[i], data_format,
                            params.batch_norm_decay, params.batch_norm_epsilon,
                            params.num_stacks, params.num_out, params.n_low,
                            params.num_joints, batch_size, params.seq_length)
                        tower_losses.append(loss)
                        tower_gradvars.append(gradvars)
                        tower_preds.append(preds)
                        if i == 0:
                            # Only trigger batch_norm moving mean and variance update from
                            # the 1st tower. Ideally, we should grab the updates from all
                            # towers but these stats accumulate extremely fast so we can
                            # ignore the other stats from the other towers without
                            # significant detriment.
                            update_ops = tf.get_collection(
                                tf.GraphKeys.UPDATE_OPS, name_scope)

        if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:

            # Now compute global loss and gradients.
            gradvars = []
            with tf.name_scope('gradient_averaging'):
                all_grads = {}
                for grad, var in itertools.chain(*tower_gradvars):
                    if grad is not None:
                        all_grads.setdefault(var, []).append(grad)
                for var, grads in six.iteritems(all_grads):
                    # Average gradients on the same device as the variables
                    # to which they apply.
                    with tf.device(var.device):
                        if len(grads) == 1:
                            avg_grad = grads[0]
                        else:
                            avg_grad = tf.multiply(tf.add_n(grads),
                                                   1. / len(grads))
                    gradvars.append((avg_grad, var))

            # Device that runs the ops to apply global gradient updates.
            consolidation_device = '/gpu:0' if variable_strategy == 'GPU' else '/cpu:0'
            with tf.device(consolidation_device):

                learning_rate = tf.train.exponential_decay(
                    init_learning_rate,
                    tf.train.get_global_step(),
                    decay_step,
                    decay_factor,
                    staircase=True,
                    name='learning_rate')

                loss = tf.reduce_mean(tower_losses, name='loss')

                examples_sec_hook = utils.ExamplesPerSecondHook(
                    params.train_batch_size, every_n_steps=10)

                tensors_to_log = {'learning_rate': learning_rate, 'loss': loss}

                logging_hook = tf.train.LoggingTensorHook(
                    tensors=tensors_to_log, every_n_iter=100)

                train_hooks = [logging_hook, examples_sec_hook]

                optimizer = tf.train.RMSPropOptimizer(
                    learning_rate=learning_rate)

                if params.sync:
                    optimizer = tf.train.SyncReplicasOptimizer(
                        optimizer, replicas_to_aggregate=num_workers)
                    sync_replicas_hook = optimizer.make_session_run_hook(
                        params.is_chief)
                    train_hooks.append(sync_replicas_hook)

                # Create single grouped train op
                train_op = [
                    optimizer.apply_gradients(
                        gradvars, global_step=tf.train.get_global_step())
                ]

                train_op.extend(update_ops)
                train_op = tf.group(*train_op)

                predictions = {
                    'heatmaps':
                    tf.concat([p['heatmaps'] for p in tower_preds], axis=0),
                    'images':
                    tf.concat([i for i in tower_features], axis=0)
                }
                if mode == tf.estimator.ModeKeys.EVAL:
                    hm = predictions['heatmaps']
                    stacked_labels = tf.concat(labels[0][0][0], axis=0)

                    gt_labels = tf.transpose(stacked_labels, [1, 0, 3, 4, 2])

                    joint_accur = []
                    for j in range(params.seq_length):
                        for i in range(params.num_joints):
                            joint_accur.append(
                                _pck_hm(hm[j, :, -1, :, :,
                                           i], gt_labels[j, :, :, :, i],
                                        params.eval_batch_size / num_devices))
                    accuracy = tf.stack(joint_accur)
                    metrics = {'Mean Pixel Error': tf.metrics.mean(accuracy)}
                    tf.logging.info('Accuracy op computed')
                else:
                    metrics = None

        else:
            train_op = None
            loss = None
            train_hooks = None
            metrics = None
            predictions = {
                'heatmaps': tf.concat([p['heatmaps'] for p in tower_preds],
                                      axis=0),
                'images': tf.concat([i for i in tower_features], axis=0)
            }

        return tf.estimator.EstimatorSpec(mode=mode,
                                          predictions=predictions,
                                          loss=loss,
                                          train_op=train_op,
                                          training_hooks=train_hooks,
                                          eval_metric_ops=metrics)

コード例 #4

ファイルを表示

ファイル: main.py プロジェクト: greenfigo2015/test

    def _model_fn(features, labels, mode, params):
        """Resnet model body.

    Support single host, one or more GPU training. Parameter distribution can
    be either one of the following scheme.
    1. CPU is the parameter server and manages gradient updates.
    2. Parameters are distributed evenly across all GPUs, and the first GPU
       manages gradient updates.

    Args:
      features: a list of tensors, one for each tower
      labels: a list of tensors, one for each tower
      mode: ModeKeys.TRAIN or EVAL
      params: Hyperparameters suitable for tuning
    Returns:
      A EstimatorSpec object.
    """
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        weight_decay = params.weight_decay
        momentum = params.momentum

        tower_features = features
        tower_labels = labels
        tower_losses = []
        tower_gradvars = []
        tower_preds = []

        # channels first (NCHW) is normally optimal on GPU and channels last (NHWC)
        # on CPU. The exception is Intel MKL on CPU which is optimal with
        # channels_last.
        data_format = params.data_format
        if not data_format:
            if num_gpus == 0:
                data_format = 'channels_last'
            else:
                data_format = 'channels_first'

        if num_gpus == 0:
            num_devices = 1
            device_type = 'cpu'
        else:
            num_devices = num_gpus
            device_type = 'gpu'

        for i in range(num_devices):
            worker_device = '/{}:{}'.format(device_type, i)
            if variable_strategy == 'CPU':
                device_setter = utils.local_device_setter(
                    worker_device=worker_device)
            elif variable_strategy == 'GPU':
                device_setter = utils.local_device_setter(
                    ps_device_type='gpu',
                    worker_device=worker_device,
                    ps_strategy=tf.contrib.training.
                    GreedyLoadBalancingStrategy(
                        num_gpus, tf.contrib.training.byte_size_load_fn))
            with tf.variable_scope(params.model_name, reuse=bool(i != 0)):
                with tf.name_scope('tower_%d' % i) as name_scope:
                    with tf.device(device_setter):
                        loss, gradvars, preds = _tower_fn(
                            is_training, params.dp_keep_prob, weight_decay,
                            tower_features[i], tower_labels[i], data_format,
                            params.num_layers, params.batch_norm_decay,
                            params.batch_norm_epsilon, params)
                        tower_losses.append(loss)
                        tower_gradvars.append(gradvars)
                        tower_preds.append(preds)
                        if i == 0:
                            # Only trigger batch_norm moving mean and variance update from
                            # the 1st tower. Ideally, we should grab the updates from all
                            # towers but these stats accumulate extremely fast so we can
                            # ignore the other stats from the other towers without
                            # significant detriment.
                            update_ops = tf.get_collection(
                                tf.GraphKeys.UPDATE_OPS, name_scope)

        # Now compute global loss and gradients.
        gradvars = []
        with tf.name_scope('gradient_averaging'):
            all_grads = {}
            for grad, var in itertools.chain(*tower_gradvars):
                if grad is not None:
                    all_grads.setdefault(var, []).append(grad)
            for var, grads in six.iteritems(all_grads):
                # Average gradients on the same device as the variables
                # to which they apply.
                with tf.device(var.device):
                    if len(grads) == 1:
                        avg_grad = grads[0]
                    else:
                        avg_grad = tf.multiply(tf.add_n(grads),
                                               1. / len(grads))
                gradvars.append((avg_grad, var))

        # Device that runs the ops to apply global gradient updates.
        consolidation_device = '/gpu:0' if variable_strategy == 'GPU' else '/cpu:0'
        with tf.device(consolidation_device):
            # Suggested learning rate scheduling from
            # https://github.com/ppwwyyxx/tensorpack/blob/master/examples/ResNet/cifar10-resnet.py#L155
            num_batches_per_epoch = imagenet.ImageNetDataSet.num_examples_per_epoch(
                'train') // (params.train_batch_size * num_workers)
            boundaries = [
                num_batches_per_epoch * x
                for x in np.array([30, 60, 90], dtype=np.int64)
            ]
            staged_lr = [
                params.learning_rate * x for x in [1, 0.1, 0.01, 0.002]
            ]

            learning_rate = tf.train.piecewise_constant(
                tf.train.get_global_step(), boundaries, staged_lr)

            loss = tf.reduce_mean(tower_losses, name='loss')

            examples_sec_hook = utils.ExamplesPerSecondHook(
                params.train_batch_size, every_n_steps=10)

            #optimizer = tf.train.MomentumOptimizer(
            #    learning_rate=learning_rate, momentum=momentum)
            optimizer = tf.train.AdamOptimizer()

            if params.sync:
                optimizer = tf.train.SyncReplicasOptimizer(
                    optimizer, replicas_to_aggregate=num_workers)
                sync_replicas_hook = optimizer.make_session_run_hook(
                    params.is_chief)
                train_hooks.append(sync_replicas_hook)

            # Create single grouped train op
            train_op = [
                optimizer.apply_gradients(
                    gradvars, global_step=tf.train.get_global_step())
            ]
            train_op.extend(update_ops)
            train_op = tf.group(*train_op)

            predictions = {
                'classes':
                tf.concat([p['classes'] for p in tower_preds], axis=0),
                'probabilities':
                tf.concat([p['probabilities'] for p in tower_preds], axis=0)
            }
            stacked_labels = tf.concat(labels, axis=0)
            metrics = {
                'accuracy':
                tf.metrics.accuracy(stacked_labels, predictions['classes'])
            }
            tensors_to_log = {
                'learning_rate': learning_rate,
                'loss': loss,
                'acc': metrics['accuracy'][0]
            }
            logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log,
                                                      every_n_iter=100)
            train_hooks = [logging_hook, examples_sec_hook]

        return tf.estimator.EstimatorSpec(mode=mode,
                                          predictions=predictions,
                                          loss=loss,
                                          train_op=train_op,
                                          training_hooks=train_hooks,
                                          eval_metric_ops=metrics)

コード例 #5

ファイルを表示

def model_fn_signature(features, labels, mode, params):
    """Model function for tf.estimator

    Args:
        features: input batch of images
        labels:True or not
        mode: can be one of tf.estimator.ModeKeys.{TRAIN, EVAL }
        params: contains hyper parameters of the model (ex: `params.learning_rate`)

    Returns:
        model_spec: tf.estimator.EstimatorSpec object
    """
    is_training = (mode == tf.estimator.ModeKeys.TRAIN)
    loss_function = models[params.model]

    losses_all_tower = []
    distance_all_tower = []
    images_all_tower = tf.split(features, params.num_gpus, axis=0)
    labels_all_tower = None
    if labels is not None:
        labels = tf.reshape(labels, [-1])
        labels_all_tower = tf.split(labels, params.num_gpus, axis=0)

    for i in range(params.num_gpus):
        worker_device = '/{}:{}'.format('gpu', i)
        images_tower = images_all_tower[i]

        device_setter = utils.local_device_setter(
            ps_device_type='gpu',
            worker_device=worker_device,
            ps_strategy=tf.contrib.training.GreedyLoadBalancingStrategy(
                params.num_gpus, tf.contrib.training.byte_size_load_fn))
        with tf.device(device_setter):
            if labels_all_tower is not None:
                loss, distance = loss_function(
                    images_tower, labels_all_tower[i], params, is_training)
                losses_all_tower.append(loss)
            else:
                _, distance = loss_function(
                    images_tower, None, params, is_training)
            distance_all_tower.append(distance)

    consolidation_device = '/cpu:0'
    with tf.device(consolidation_device):
        distance = tf.concat(distance_all_tower, 0)
        if mode == tf.estimator.ModeKeys.PREDICT:
            predictions = {'distance': distance}
            return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

        loss = tf.reduce_mean(losses_all_tower, name='loss_mean')
        labels = tf.reshape(labels, [-1, 1])
        labels_reversal = tf.reshape(tf.subtract(
            1.0, labels), [-1, 1])  # labels_ = !labels;
        positive_distance = tf.reduce_mean(tf.multiply(labels, distance))
        negative_distance = tf.reduce_mean(
            tf.multiply(labels_reversal, distance))
        tf.summary.scalar('loss', loss)
        tf.summary.scalar('positive_distance', positive_distance)
        tf.summary.scalar('negative_distance', negative_distance)

        distance_norm = _normlize_distance(distance)
        metric_ops = tf.metrics.auc(labels_reversal, distance_norm)
        tf.summary.scalar('auc', metric_ops[1])

        if mode == tf.estimator.ModeKeys.EVAL:
            sec_at_spe_metric = tf.metrics.sensitivity_at_specificity(
                labels_reversal, distance_norm, 0.90)
            eval_metric_ops = {'evaluation_auc': metric_ops,
                               'sec_at_spe': sec_at_spe_metric}

            return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=eval_metric_ops)

        else:

            logging_hook = tf.train.LoggingTensorHook({"positive_distance": positive_distance,
                                                       "negative_distance": negative_distance,
                                                       "auc": metric_ops[1]}, every_n_iter=100)

            # optimizer = tf.train.RMSPropOptimizer(params.learning_rate)
            optimizer = tf.train.AdamOptimizer(params.learning_rate)
            global_step = tf.train.get_global_step()
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                train_op = optimizer.minimize(
                    loss, global_step=global_step, colocate_gradients_with_ops=True)

            return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op, training_hooks=[logging_hook])

コード例 #6

ファイルを表示

ファイル: main.py プロジェクト: xwcbigboy/LUPA-SGD

    def _linearregression_model_fn_sync(features, labels, mode, params):
        """Resnet model body.

    Support single host, one or more GPU training. Parameter distribution can
    be either one of the following scheme.
    1. CPU is the parameter server and manages gradient updates.
    2. Parameters are distributed evenly across all GPUs, and the first GPU
       manages gradient updates.

    Args:
      features: a list of tensors, one for each tower
      labels: a list of tensors, one for each tower
      mode: ModeKeys.TRAIN or EVAL
      params: Hyperparameters suitable for tuning
    Returns:
      A EstimatorSpec object.
    """
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        weight_decay = params.weight_decay

        features = features[0:num_gpus]
        labels = labels[0:num_gpus]
        tower_features = features
        tower_labels = labels
        tower_losses = []
        tower_gradvars = []
        tower_preds = []

        if num_gpus == 0:
            num_devices = 1
            device_type = 'cpu'
        else:
            num_devices = num_gpus
            device_type = 'gpu'

        for i in range(num_devices):
            worker_device = '/{}:{}'.format(device_type, i)
            if variable_strategy == 'CPU':
                device_setter = utils.local_device_setter(
                    worker_device=worker_device)
            elif variable_strategy == 'GPU':
                device_setter = utils.local_device_setter(
                    ps_device_type='gpu',
                    worker_device=worker_device,
                    ps_strategy=tf.contrib.training.
                    GreedyLoadBalancingStrategy(
                        num_gpus, tf.contrib.training.byte_size_load_fn))
            with tf.variable_scope('LinearRegression',
                                   reuse=bool(i != 0)) as var_scope:
                with tf.name_scope('tower_%d' % i) as name_scope:
                    with tf.device(device_setter):
                        loss, gradvars, preds = _tower_fn(
                            is_training, weight_decay, tower_features[i],
                            tower_labels[i], params.feature_dim,
                            var_scope.name, params.problem)
                        tower_losses.append(loss)
                        tower_gradvars.append(gradvars)
                        tower_preds.append(preds)

        # Now compute global loss and gradients.
        gradvars = []
        with tf.name_scope('gradient_averaging'):
            all_grads = {}
            for grad, var in itertools.chain(*tower_gradvars):
                if grad is not None:
                    all_grads.setdefault(var, []).append(grad)
            for var, grads in six.iteritems(all_grads):
                # Average gradients on the same device as the variables
                # to which they apply.
                with tf.device(var.device):
                    if len(grads) == 1:
                        avg_grad = grads[0]
                    else:
                        avg_grad = tf.multiply(tf.add_n(grads),
                                               1. / len(grads))
                gradvars.append((avg_grad, var))

        # Device that runs the ops to apply global gradient updates.
        consolidation_device = '/gpu:0' if variable_strategy == 'GPU' else '/cpu:0'
        with tf.device(consolidation_device):
            loss = tf.reduce_mean(tower_losses, name='loss')

            examples_sec_hook = utils.ExamplesPerSecondHook(
                params.train_batch_size, every_n_steps=100)

            tensors_to_log = {'loss': loss}

            logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log,
                                                      every_n_iter=100)

            train_hooks = [logging_hook, examples_sec_hook]

            # optimizer = tf.train.GradientDescentOptimizer(learning_rate=params.learning_rate)
            optimizer = tf.train.AdamOptimizer(
                learning_rate=params.learning_rate)

            if params.run_type == 'sync':
                optimizer = tf.train.SyncReplicasOptimizer(
                    optimizer, replicas_to_aggregate=num_workers)
                sync_replicas_hook = optimizer.make_session_run_hook(
                    params.is_chief)
                train_hooks.append(sync_replicas_hook)

            # Create single grouped train op
            train_op = [
                optimizer.apply_gradients(
                    gradvars, global_step=tf.train.get_global_step())
            ]

            train_op = tf.group(*train_op)

        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=loss,
                                          train_op=train_op,
                                          training_hooks=train_hooks)

コード例 #7

ファイルを表示

ファイル: main.py プロジェクト: xwcbigboy/LUPA-SGD

    def _linearregression_model_fn_local(features, labels, mode, params):
        """

    Args:
      features: a list of tensors, one for each tower
      labels: a list of tensors, one for each tower
      mode: ModeKeys.TRAIN or EVAL
      params: Hyperparameters suitable for tuning
    Returns:
      A EstimatorSpec object.
    """
        is_training = (mode == tf.estimator.ModeKeys.TRAIN)
        weight_decay = params.weight_decay

        # features = features[0:num_gpus]
        # labels = labels[0:num_gpus]
        tower_features = features
        tower_labels = labels
        tower_losses = []
        tower_ops = []
        tower_preds = []
        var_scopes = []

        if num_gpus == 0:
            num_devices = 1
            device_type = 'cpu'
        else:
            num_devices = num_gpus
            device_type = 'gpu'

        for i in range(num_devices):
            worker_device = '/{}:{}'.format(device_type, i)
            if variable_strategy == 'CPU':
                device_setter = utils.local_device_setter(
                    worker_device=worker_device)
                # device_setter = tf.train.replica_device_setter(
                #     worker_device=worker_device)
            elif variable_strategy == 'GPU':
                device_setter = utils.local_device_setter(
                    ps_device_type='gpu',
                    worker_device=worker_device,
                    ps_strategy=tf.contrib.training.
                    GreedyLoadBalancingStrategy(
                        num_gpus, tf.contrib.training.byte_size_load_fn))
                # device_setter = tf.train.replica_device_setter(
                #     ps_device=worker_device,
                #     worker_device=worker_device
                # )
            with tf.variable_scope(
                    'LinearRegression_{}'.format(i)) as var_scope:
                with tf.name_scope('tower_%d' % i) as name_scope:
                    with tf.device(device_setter):
                        loss, gradvars, preds = _tower_fn(
                            is_training, weight_decay, tower_features[i],
                            tower_labels[i], params.feature_dim,
                            var_scope.name, params.problem)
                        var_scopes.append(var_scope.name)

                        tower_losses.append(loss)
                        # tower_gradvars.append(gradvars)
                        tower_preds.append(preds)

                        global_step = tf.cast(tf.train.get_global_step(),
                                              tf.float32)
                        lr = params.learning_rate
                        # optimizer = tf.train.GradientDescentOptimizer(learning_rate=params.learning_rate)
                        optimizer = tf.train.AdamOptimizer(learning_rate=lr)
                        # optimizer = tf.train.MomentumOptimizer(learning_rate=params.learning_rate,momentum=0.97)

                        # Create single grouped train op
                        train_op = [
                            optimizer.apply_gradients(
                                gradvars,
                                global_step=tf.train.get_global_step(),
                                name='apply_gradient_tower_{}'.format(i))
                        ]
                        tower_ops.append(train_op)

        # Device that runs the ops to apply global gradient updates.
        consolidation_device = '/gpu:0' if variable_strategy == 'GPU' else '/cpu:0'
        with tf.device(consolidation_device):

            examples_sec_hook = utils.ExamplesPerSecondHook(
                params.train_batch_size * (1 + params.redundancy),
                every_n_steps=100)
            loss = tf.reduce_mean(tower_losses, name='loss')
            tensors_to_log = {'loss': loss}
            logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log,
                                                      every_n_iter=100)
            train_hooks = [logging_hook, examples_sec_hook]
            if params.run_type == 'multi':
                if params.adaptive:
                    alpha = 2 / (params.num_comm +
                                 1) * (params.train_steps /
                                       (params.num_comm * params.sync_step))
                    local_updates = [
                        params.sync_step * (1 + alpha * i)
                        for i in range(params.num_comm + 1)
                    ]
                    sync_hook = utils.SyncHook(scopes=var_scopes,
                                               every_n_steps=params.sync_step,
                                               adaptive=local_updates)
                else:
                    sync_hook = utils.SyncHook(scopes=var_scopes,
                                               every_n_steps=params.sync_step)
                train_hooks.append(sync_hook)

            train_ops = tf.group(*tower_ops)

        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=loss,
                                          train_op=train_ops,
                                          training_hooks=train_hooks)