def get_variables(scope):
    return tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope)
 def init_learner_state(self):
     learner_init_op = tf.initialize_variables(
         self.learner.learner.get_variables(tf.GraphKeys.GLOBAL_VARIABLES))
     local_inits = tf.get_collection(tf.GraphKeys.LOCAL_INIT_OP)
     with tf.control_dependencies(local_inits + [learner_init_op]):
         return self.learner.assign_state(self.learner.initial_state())
Exemple #3
0
def get_ckpt_var_map(ckpt_path, ckpt_scope, var_scope, skip_mismatch=None):
    """Get a var map for restoring from pretrained checkpoints.

  Args:
    ckpt_path: string. A pretrained checkpoint path.
    ckpt_scope: string. Scope name for checkpoint variables.
    var_scope: string. Scope name for model variables.
    skip_mismatch: skip variables if shape mismatch.

  Returns:
    var_map: a dictionary from checkpoint name to model variables.
  """
    logging.info('Init model from checkpoint {}'.format(ckpt_path))
    if not ckpt_scope.endswith('/') or not var_scope.endswith('/'):
        raise ValueError('Please specific scope name ending with /')
    if ckpt_scope.startswith('/'):
        ckpt_scope = ckpt_scope[1:]
    if var_scope.startswith('/'):
        var_scope = var_scope[1:]

    var_map = {}
    # Get the list of vars to restore.
    model_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                   scope=var_scope)
    reader = tf.train.load_checkpoint(ckpt_path)
    ckpt_var_name_to_shape = reader.get_variable_to_shape_map()
    ckpt_var_names = set(reader.get_variable_to_shape_map().keys())

    for i, v in enumerate(model_vars):
        if not v.op.name.startswith(var_scope):
            logging.info('skip {} -- does not match scope {}'.format(
                v.op.name, var_scope))
        ckpt_var = ckpt_scope + v.op.name[len(var_scope):]
        if (ckpt_var not in ckpt_var_names
                and v.op.name.endswith('/ExponentialMovingAverage')):
            ckpt_var = ckpt_scope + v.op.name[:-len('/ExponentialMovingAverage'
                                                    )]

        if ckpt_var not in ckpt_var_names:
            if 'Momentum' in ckpt_var or 'RMSProp' in ckpt_var:
                # Skip optimizer variables.
                continue
            if skip_mismatch:
                logging.info('skip {} ({}) -- not in ckpt'.format(
                    v.op.name, ckpt_var))
                continue
            raise ValueError('{} is not in ckpt {}'.format(v.op, ckpt_path))

        if v.shape != ckpt_var_name_to_shape[ckpt_var]:
            if skip_mismatch:
                logging.info('skip {} ({} vs {}) -- shape mismatch'.format(
                    v.op.name, v.shape, ckpt_var_name_to_shape[ckpt_var]))
                continue
            raise ValueError('shape mismatch {} ({} vs {})'.format(
                v.op.name, v.shape, ckpt_var_name_to_shape[ckpt_var]))

        if i < 5:
            # Log the first few elements for sanity check.
            logging.info('Init {} from ckpt var {}'.format(
                v.op.name, ckpt_var))
        var_map[ckpt_var] = v

    return var_map
Exemple #4
0
def gradients(ys, xs, grad_ys=None, checkpoints="collection", **kwargs):
    """Recompute gradients.

  Authors: Tim Salimans & Yaroslav Bulatov
  Modified by: Nikolay Zakirov

  memory efficient gradient implementation inspired by
  "Training Deep Nets with Sublinear Memory Cost"
  by Chen et al. 2016 (https://arxiv.org/abs/1604.06174)

  ys,xs,grad_ys,kwargs are the arguments to standard tensorflow tf.gradients
  (https://www.tensorflow.org/versions/r0.12/api_docs/python/train.html#gradients)

  'checkpoints' can either be
      - a list consisting of tensors from the forward pass of the neural net
        that we should re-use when calculating the gradients in the
        backward pass
        all other tensors that do not appear in this list will be re-computed
      - a string or list specifying how this list should be determined.
      currently we support
          - 'speed':  checkpoint all outputs of convolutions and matmuls.
          these ops are usually the most expensive,
                      so checkpointing them maximizes the running speed
                      (this is a good option if nonlinearities, concats,
                      batchnorms, etc are taking up a lot of memory)
          - 'memory': try to minimize the memory usage
                      (currently using a very simple strategy that
                      identifies a number of bottleneck tensors in the
                      graph to checkpoint)
          - 'collection': look for a tensorflow collection named
          'checkpoints', which holds the tensors to checkpoint
          - a list: a list of strings to be matched in the names of
          the tensors
  """
    #    print("Calling memsaving gradients with", checkpoints)
    if not isinstance(ys, list):
        ys = [ys]
    if not isinstance(xs, list):
        xs = [xs]

    bwd_ops = ge.get_backward_walk_ops([y.op for y in ys], inclusive=True)

    logging.debug("bwd_ops: %s", len(bwd_ops))

    # forward ops are all ops that are candidates for recomputation
    fwd_ops = ge.get_forward_walk_ops([x.op for x in xs],
                                      inclusive=True,
                                      within_ops=bwd_ops)
    logging.debug("fwd_ops: %s", len(fwd_ops))

    # exclude ops with no inputs
    fwd_ops = [op for op in fwd_ops if op.inputs]

    logging.debug("fwd_ops: %s", len(fwd_ops))
    # don't recompute xs, remove variables
    xs_ops = _to_ops(xs)
    fwd_ops = [op for op in fwd_ops if op not in xs_ops]
    fwd_ops = [op for op in fwd_ops if "/assign" not in op.name]
    fwd_ops = [op for op in fwd_ops if "/Assign" not in op.name]
    fwd_ops = [op for op in fwd_ops if "/read" not in op.name]
    logging.debug("fwd_ops: %s", len(fwd_ops))
    ts_all = ge.filter_ts(fwd_ops, True)  # get the tensors
    logging.debug("ts_all: %s", len(ts_all))

    ts_all = [t for t in ts_all if "/read" not in t.name]
    ts_all = set(ts_all) - set(xs) - set(ys)
    logging.debug("ts_all: %s", len(ts_all))

    # construct list of tensors to checkpoint during forward pass, if not
    # given as input
    if not isinstance(checkpoints, list):
        if checkpoints == "collection":
            checkpoints = tf.get_collection("checkpoints")

        elif checkpoints == "speed":
            # checkpoint all expensive ops to maximize running speed
            checkpoints = ge.filter_ts_from_regex(fwd_ops,
                                                  "conv2d|Conv|MatMul")

        elif checkpoints == "memory":

            # remove very small tensors and some weird ops
            def fixdims(
                t
            ):  # tf.Dimension values are not compatible with int, convert manually
                try:
                    return [
                        int(e if e is not None else 64) for e in t.as_list()
                    ]
                except AttributeError as e:
                    logging.exception("%s", e)
                    logging.exception("unknown shape %s", t)
                    return [0]  # unknown shape

            ts_all = [
                t for t in ts_all
                if np.prod(fixdims(t.shape)) > MIN_CHECKPOINT_NODE_SIZE
                # if (tf.size(t) > MIN_CHECKPOINT_NODE_SIZE)
            ]
            logging.debug("ts_all: %s", len(ts_all))
            ts_all = [t for t in ts_all if "L2Loss" not in t.name]
            ts_all = [t for t in ts_all if "entropy" not in t.name]
            ts_all = [t for t in ts_all if "FusedBatchNorm" not in t.name]
            ts_all = [t for t in ts_all if "Switch" not in t.name]
            ts_all = [t for t in ts_all if "dropout" not in t.name]
            # DV: FP16_FIX - need to add 'Cast' layer here to make it work for FP16
            ts_all = [t for t in ts_all if "Cast" not in t.name]
            logging.debug("ts_all: %s", len(ts_all))

            # filter out all tensors that are inputs of the backward graph
            with util.capture_ops() as bwd_ops:
                tf_gradients(ys, xs, grad_ys, **kwargs)

            bwd_inputs = [t for op in bwd_ops for t in op.inputs]
            # list of tensors in forward graph that is in input to bwd graph
            ts_filtered = list(set(bwd_inputs).intersection(ts_all))
            debug_print("Using tensors %s", ts_filtered)

            # try two slightly different ways of getting bottlenecks tensors
            # to checkpoint
            logging.debug("len(ts_filtered): %s", len(ts_filtered))
            logging.debug("len(ts_all) %s", len(ts_all))
            for ts in [ts_filtered, ts_all]:

                # get all bottlenecks in the graph
                bottleneck_ts = []
                for t in ts:

                    b = set(
                        ge.get_backward_walk_ops(t.op,
                                                 inclusive=True,
                                                 within_ops=fwd_ops))
                    f = set(
                        ge.get_forward_walk_ops(t.op,
                                                inclusive=False,
                                                within_ops=fwd_ops))
                    # check that there are no shortcuts
                    b_inp = {inp
                             for op in b
                             for inp in op.inputs}.intersection(ts_all)
                    f_inp = {inp
                             for op in f
                             for inp in op.inputs}.intersection(ts_all)
                    if not set(b_inp).intersection(
                            f_inp) and len(b_inp) + len(f_inp) >= len(ts_all):
                        bottleneck_ts.append(t)  # we have a bottleneck!
                    else:
                        logging.debug(
                            "Rejected bottleneck candidate and ops %s %d", [t],
                            len(b_inp) + len(f_inp) - len(ts_all))

                # success? or try again without filtering?
                if len(bottleneck_ts) >= np.sqrt(
                        len(ts_filtered)):  # yes, enough bottlenecks found!
                    break
            # bottleneck_ts = [t for t in ts_all if 'Add' in t.name]
            # logging.debug("Add only ts_all: %s", len(bottleneck_ts))

            if not bottleneck_ts:
                raise Exception(
                    "unable to find bottleneck tensors! please provide checkpoint "
                    'nodes manually, or use checkpoints="speed" or a list of strings.'
                )
            logging.debug("len(bottleneck_ts): %s", len(bottleneck_ts))

            # sort the bottlenecks
            bottlenecks_sorted_lists = tf_toposort(bottleneck_ts,
                                                   within_ops=fwd_ops)
            sorted_bottlenecks = [
                t for ts in bottlenecks_sorted_lists for t in ts
            ]

            # save an approximately optimal number ~ sqrt(N)
            n_filtered = len(ts_filtered)
            if len(bottleneck_ts) <= np.ceil(np.sqrt(n_filtered)):
                checkpoints = sorted_bottlenecks
            else:
                step = int(np.ceil(len(bottleneck_ts) / np.sqrt(n_filtered)))
                checkpoints = sorted_bottlenecks[step::step]

        else:
            raise Exception('%s is unsupported input for "checkpoints"' %
                            (checkpoints, ))
    else:
        # exclude some layers as was done in the original bottleneck searching
        # algorithm
        for excl_layer in [
                "L2Loss", "entropy", "FusedBatchNorm", "Switch", "dropout",
                "Cast"
        ]:
            ts_all = [t for t in ts_all if excl_layer not in t.name]
            logging.info("Excluding %s from ts_all: %d", excl_layer,
                         len(ts_all))

        # leave only layers that match strings in checkpoints list
        ts_all = [
            t for t in ts_all
            if any(partial_match in t.name for partial_match in checkpoints)
        ]
        logging.info("Leaving only %s in ts_all: %d", checkpoints, len(ts_all))
        checkpoints = ts_all.copy()

    checkpoints = list(set(checkpoints).intersection(ts_all))

    # at this point selection happened and checkpoints is list of nodes
    # assert isinstance(checkpoints, list)

    # TODO(nikzak): implement multithreading in graph recomputation
    logging.info("Checkpoint nodes used: %s", len(checkpoints))
    # better error handling of special cases
    # xs are already handled as checkpoint nodes, so no need to include them
    xs_intersect_checkpoints = set(xs).intersection(set(checkpoints))
    if xs_intersect_checkpoints:
        debug_print("Warning, some input nodes are also checkpoint nodes: %s",
                    xs_intersect_checkpoints)
    ys_intersect_checkpoints = set(ys).intersection(set(checkpoints))
    debug_print("ys: %s, checkpoints: %s, intersect: %s", ys, checkpoints,
                ys_intersect_checkpoints)
    # saving an output node (ys) gives no benefit in memory while creating
    # new edge cases, exclude them
    if ys_intersect_checkpoints:
        debug_print(
            "Warning, some output nodes are also checkpoints nodes: %s",
            format_ops(ys_intersect_checkpoints))

    # remove initial and terminal nodes from checkpoints list if present

    checkpoints = list(set(checkpoints) - set(ys) - set(xs))
    logging.info("Pruned initial and terminal nodes. Leaving %d",
                 len(checkpoints))

    # check that we have some nodes to checkpoint
    if not checkpoints:
        raise Exception("no checkpoints nodes found or given as input! ")

    # disconnect dependencies between checkpointed tensors
    checkpoints_disconnected = {}
    for x in checkpoints:
        if x.op and x.op.name is not None:
            grad_node = tf.stop_gradient(x, name=x.op.name + "_sg")
        else:
            grad_node = tf.stop_gradient(x)
        grad_node.op._set_device(x.op.node_def.device)
        checkpoints_disconnected[x] = grad_node

    # partial derivatives to the checkpointed tensors and xs
    ops_to_copy = fast_backward_ops(seed_ops=[y.op for y in ys],
                                    stop_at_ts=checkpoints,
                                    within_ops=fwd_ops)
    debug_print("Found %s ops to copy within fwd_ops %s, seed %s, stop_at %s",
                len(ops_to_copy), fwd_ops, [r.op for r in ys], checkpoints)
    debug_print("ops_to_copy = %s", ops_to_copy)
    debug_print("Processing list %s", ys)
    _, info = ge.copy_with_input_replacements(ge.sgv(ops_to_copy), {})
    for origin_op, op in info._transformed_ops.items():
        op._set_device(origin_op.node_def.device)
    copied_ops = info._transformed_ops.values()
    debug_print("Copied %s to %s", ops_to_copy, copied_ops)
    ge.reroute_ts(checkpoints_disconnected.values(),
                  checkpoints_disconnected.keys(),
                  can_modify=copied_ops)
    debug_print("Rewired %s in place of %s restricted to %s",
                checkpoints_disconnected.values(),
                checkpoints_disconnected.keys(), copied_ops)

    # get gradients with respect to current boundary + original x's
    copied_ys = [info._transformed_ops[y.op]._outputs[0] for y in ys]
    boundary = list(checkpoints_disconnected.values())
    dv = tf_gradients(ys=copied_ys,
                      xs=boundary + xs,
                      grad_ys=grad_ys,
                      **kwargs)
    debug_print("Got gradients %s", dv)
    debug_print("for %s", copied_ys)
    debug_print("with respect to %s", boundary + xs)

    inputs_to_do_before = [y.op for y in ys]
    if grad_ys is not None:
        inputs_to_do_before += grad_ys
    wait_to_do_ops = list(copied_ops) + [g.op for g in dv if g is not None]
    my_add_control_inputs(wait_to_do_ops, inputs_to_do_before)

    # partial derivatives to the checkpointed nodes
    # dictionary of "node: backprop" for nodes in the boundary
    d_checkpoints = dict(
        zip(checkpoints_disconnected.keys(),
            dv[:len(checkpoints_disconnected)]))

    # partial derivatives to xs (usually the params of the neural net)
    d_xs = dv[len(checkpoints_disconnected):]

    # incorporate derivatives flowing through the checkpointed nodes
    logging.info("Sorting nodes topologically")
    checkpoints_sorted_lists = tf_toposort(checkpoints, within_ops=fwd_ops)
    logging.info("Rebuilding graph with %d checkpoints",
                 len(checkpoints_sorted_lists))
    for index, ts in enumerate(checkpoints_sorted_lists[::-1]):
        if index % 50 == 0:
            logging.info("Processed %d nodes", index)
        debug_print("Processing list %s", ts)
        checkpoints_other = [r for r in checkpoints if r not in ts]
        checkpoints_disconnected_other = [
            checkpoints_disconnected[r] for r in checkpoints_other
        ]

        # copy part of the graph below current checkpoint node, stopping at
        # other checkpoints nodes
        ops_to_copy = fast_backward_ops(within_ops=fwd_ops,
                                        seed_ops=[r.op for r in ts],
                                        stop_at_ts=checkpoints_other)
        debug_print("Found %s ops to copy within %s, seed %s, stop_at %s",
                    len(ops_to_copy), fwd_ops, [r.op for r in ts],
                    checkpoints_other)
        debug_print("ops_to_copy = %s", ops_to_copy)
        if not ops_to_copy:  # we're done!
            break
        _, info = ge.copy_with_input_replacements(ge.sgv(ops_to_copy), {})
        for origin_op, op in info._transformed_ops.items():
            op._set_device(origin_op.node_def.device)
        copied_ops = info._transformed_ops.values()
        debug_print("Copied %s to %s", ops_to_copy, copied_ops)
        ge.reroute_ts(checkpoints_disconnected_other,
                      checkpoints_other,
                      can_modify=copied_ops)
        debug_print("Rewired %s in place of %s restricted to %s",
                    checkpoints_disconnected_other, checkpoints_other,
                    copied_ops)

        # gradient flowing through the checkpointed node
        boundary = [info._transformed_ops[r.op]._outputs[0] for r in ts]
        substitute_backprops = [d_checkpoints[r] for r in ts]
        dv = tf_gradients(boundary,
                          checkpoints_disconnected_other + xs,
                          grad_ys=substitute_backprops,
                          **kwargs)
        debug_print("Got gradients %s", dv)
        debug_print("for %s", boundary)
        debug_print("with respect to %s", checkpoints_disconnected_other + xs)
        debug_print("with boundary backprop substitutions %s",
                    substitute_backprops)

        inputs_to_do_before = [d_checkpoints[r].op for r in ts]
        wait_to_do_ops = list(copied_ops) + [g.op for g in dv if g is not None]
        my_add_control_inputs(wait_to_do_ops, inputs_to_do_before)

        # partial derivatives to the checkpointed nodes
        for r, dr in zip(checkpoints_other, dv[:len(checkpoints_other)]):
            if dr is not None:
                if d_checkpoints[r] is None:
                    d_checkpoints[r] = dr
                else:
                    d_checkpoints[r] += dr

        def _unsparsify(x):
            if not isinstance(x, tf.IndexedSlices):
                return x
            if x.dense_shape is None:
                raise ValueError(
                    "memory_saving_gradients has sparse gradients of unknown shape."
                )

            indices = x.indices
            while indices.shape.ndims < x.values.shape.ndims:
                indices = tf.expand_dims(indices, -1)
            return tf.scatter_nd(indices, x.values, x.dense_shape)

        # partial derivatives to xs (usually the params of the neural net)
        d_xs_new = dv[len(checkpoints_other):]
        for j in range(len(xs)):
            if d_xs_new[j] is not None:
                if d_xs[j] is None:
                    d_xs[j] = _unsparsify(d_xs_new[j])
                else:
                    d_xs[j] += _unsparsify(d_xs_new[j])

    return d_xs
Exemple #5
0
 def vars(self):
     return tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                              scope=self.name_scope)
Exemple #6
0
    def train(self,
              input_fn,
              checkpoint_path=None,
              save_checkpoint_steps=None):
        if self._cluster_spec is not None:
            device_fn = tf.train.replica_device_setter(
                worker_device="/job:worker/task:%d" % self._worker_rank,
                merge_devices=True,
                cluster=self._cluster_spec)
            cluster_def = self._cluster_spec.as_cluster_def()
            local_address = self._cluster_spec.job_tasks('worker')[
                self._worker_rank]
            server = tf.train.Server(tf.train.ClusterSpec(
                {'local': {
                    0: local_address
                }}),
                                     job_name='local',
                                     task_index=0)
            target = 'grpc://' + local_address
        else:
            device_fn = None
            cluster_def = None
            target = None

        config = tf.ConfigProto(cluster_def=cluster_def)
        config.inter_op_parallelism_threads = 4
        config.intra_op_parallelism_threads = 4
        config.experimental.share_session_state_in_clusterspec_propagation \
            = True
        tf.config.set_soft_device_placement(False)

        with tf.Graph().as_default() as g:
            with tf.device(device_fn):
                features, labels = self._get_features_and_labels_from_input_fn(
                    input_fn, ModeKeys.TRAIN)
                spec, _ = self._get_model_spec(features, labels,
                                               ModeKeys.TRAIN)

            # Explicitly add a Saver
            if not tf.get_collection(tf.GraphKeys.SAVERS):
                saver = tf.train.Saver(
                    sharded=True, defer_build=True,
                    save_relative_paths=True)  # Must set for portability
                tf.add_to_collection(tf.GraphKeys.SAVERS, saver)

            self._bridge.connect()
            with tf.train.MonitoredTrainingSession(
                    master=target,
                    config=config,
                    is_chief=(self._worker_rank == 0),
                    checkpoint_dir=checkpoint_path,
                    save_checkpoint_steps=save_checkpoint_steps,
                    hooks=spec.training_hooks) as sess:
                iter_id = 0
                while not sess.should_stop():
                    self._bridge.start(iter_id)
                    logging.debug('after bridge start.')
                    sess.run(spec.train_op, feed_dict={})
                    logging.debug('after session run.')
                    self._bridge.commit()
                    logging.debug('after bridge commit.')
                    iter_id += 1

            if self._cluster_spec is not None:
                self._cheif_barriar(is_chief=(self._worker_rank == 0))
            self._bridge.terminate()

        return self
Exemple #7
0
def train():
    with tf.Graph().as_default():
        with tf.device('/gpu:' + str(GPU_INDEX)):
            pointclouds_pl, labels_pl = MODEL.placeholder_inputs(
                BATCH_SIZE, NUM_POINT)
            is_training_pl = tf.placeholder(tf.bool, shape=())

            # Note the global_step=batch parameter to minimize.
            # That tells the optimizer to helpfully increment the 'batch' parameter
            # for you every time it trains.
            batch = tf.get_variable('batch', [],
                                    initializer=tf.constant_initializer(0),
                                    trainable=False)
            bn_decay = get_bn_decay(batch)
            tf.summary.scalar('bn_decay', bn_decay)

            # Get model and loss
            pred, end_points = MODEL.get_model(pointclouds_pl,
                                               is_training_pl,
                                               bn_decay=bn_decay)
            MODEL.get_loss(pred, labels_pl, end_points)
            losses = tf.get_collection('losses')
            total_loss = tf.add_n(losses, name='total_loss')
            tf.summary.scalar('total_loss', total_loss)
            for l in losses + [total_loss]:
                tf.summary.scalar(l.op.name, l)

            correct = tf.equal(tf.argmax(pred, 1), tf.to_int64(labels_pl))
            accuracy = tf.reduce_sum(tf.cast(correct,
                                             tf.float32)) / float(BATCH_SIZE)
            tf.summary.scalar('accuracy', accuracy)

            print "--- Get training operator"
            # Get training operator
            learning_rate = get_learning_rate(batch)
            tf.summary.scalar('learning_rate', learning_rate)
            if OPTIMIZER == 'momentum':
                optimizer = tf.train.MomentumOptimizer(learning_rate,
                                                       momentum=MOMENTUM)
            elif OPTIMIZER == 'adam':
                optimizer = tf.train.AdamOptimizer(learning_rate)
            train_op = optimizer.minimize(total_loss, global_step=batch)

            # Add ops to save and restore all the variables.
            saver = tf.train.Saver()

        # Create a session
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        config.allow_soft_placement = True
        config.log_device_placement = False
        sess = tf.Session(config=config)

        # Add summary writers
        merged = tf.summary.merge_all()
        train_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'train'),
                                             sess.graph)
        test_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, 'test'),
                                            sess.graph)

        # Init variables
        init = tf.global_variables_initializer()
        sess.run(init)

        ops = {
            'pointclouds_pl': pointclouds_pl,
            'labels_pl': labels_pl,
            'is_training_pl': is_training_pl,
            'pred': pred,
            'loss': total_loss,
            'train_op': train_op,
            'merged': merged,
            'step': batch,
            'end_points': end_points
        }

        best_acc = -1
        for epoch in range(MAX_EPOCH):
            log_string('**** EPOCH %03d ****' % (epoch))
            sys.stdout.flush()

            train_one_epoch(sess, ops, train_writer)
            eval_one_epoch(sess, ops, test_writer)

            # Save the variables to disk.
            if epoch % 10 == 0:
                save_path = saver.save(sess,
                                       os.path.join(LOG_DIR, "model.ckpt"))
                log_string("Model saved in file: %s" % save_path)
Exemple #8
0
def get_weights():
    return tf.get_collection(_WEIGHT_COLLECTION)
Exemple #9
0
def get_scalar_summaries():
    """Returns the list of (name, Tensor) summaries recorded by scalar()."""
    return tf.get_collection('edsummaries')
Exemple #10
0
def get_masks():
    return tf.get_collection(_MASK_COLLECTION)
Exemple #11
0
def get_thresholds():
    return tf.get_collection(_THRESHOLD_COLLECTION)
Exemple #12
0
def get_masked_weights():
    return tf.get_collection(_MASKED_WEIGHT_COLLECTION)
    def __init__(self, args):
        # inputs/mask.shape=(128, None)  'None' in shape means any number  seq_length.shape=(128,)
        inputs = tf.placeholder(shape=(args.batch_size, None),
                                dtype=tf.int32,
                                name='inputs')
        mask = tf.placeholder(shape=(args.batch_size, None),
                              dtype=tf.float32,
                              name='inputs_mask')
        seq_length = tf.placeholder(shape=args.batch_size,
                                    dtype=tf.float32,
                                    name='seq_length')

        self.input_form = [inputs, mask, seq_length]

        # all shape=(128, None)
        encoder_inputs = inputs
        decoder_inputs = tf.concat(
            [tf.zeros(shape=(args.batch_size, 1), dtype=tf.int32), inputs],
            axis=1)
        decoder_targets = tf.concat(
            [inputs,
             tf.zeros(shape=(args.batch_size, 1), dtype=tf.int32)],
            axis=1)
        decoder_mask = tf.concat(
            [mask,
             tf.zeros(shape=(args.batch_size, 1), dtype=tf.float32)],
            axis=1)

        # map size
        x_size = out_size = args.map_size[0] * args.map_size[1]
        # embeddings.shape=(16900, 32)  tf.random_uniform(shape, minval=0, maxval=None, ...)
        # x_latent_size is the input embedding size = 32
        embeddings = tf.Variable(tf.random_uniform(
            [x_size, args.x_latent_size], -1.0, 1.0),
                                 dtype=tf.float32)
        # tf.nn.embedding_lookup(params, ids, ...)  Looks up ids in a list of embedding tensors.
        # shape=(128, None, 32)
        encoder_inputs_embedded = tf.nn.embedding_lookup(
            embeddings, encoder_inputs)
        decoder_inputs_embedded = tf.nn.embedding_lookup(
            embeddings, decoder_inputs)

        with tf.variable_scope("encoder"):
            # create a GRUCell  output_size = state_size = 256
            encoder_cell = tf.nn.rnn_cell.GRUCell(args.rnn_size)

            # tf.compat.v1.nn.dynamic_rnn(cell, inputs, ...) = keras.layers.RNN(cell)
            # returns (outputs, state)
            # 'outputs' is a tensor of shape [batch_size, max_time, cell_output_size]
            # 'state' is a tensor of shape [batch_size, cell_state_size] = (128, 256)
            _, encoder_final_state = tf.nn.dynamic_rnn(
                encoder_cell,
                encoder_inputs_embedded,
                sequence_length=seq_length,
                dtype=tf.float32,
            )

        # tf.compat.v1.get_variable(name, shape=None, dtype=None,
        #                           initializer=None, ...)
        mu_w = tf.get_variable("mu_w", [args.rnn_size, args.rnn_size],
                               tf.float32,
                               tf.random_normal_initializer(stddev=0.02))
        mu_b = tf.get_variable("mu_b", [args.rnn_size], tf.float32,
                               tf.constant_initializer(0.0))
        sigma_w = tf.get_variable("sigma_w", [args.rnn_size, args.rnn_size],
                                  tf.float32,
                                  tf.random_normal_initializer(stddev=0.02))
        sigma_b = tf.get_variable("sigma_b", [args.rnn_size], tf.float32,
                                  tf.constant_initializer(0.0))

        # all shape=(128, 256)
        mu = tf.matmul(encoder_final_state, mu_w) + mu_b
        log_sigma_sq = tf.matmul(encoder_final_state, sigma_w) + sigma_b
        eps = tf.random_normal(shape=tf.shape(log_sigma_sq),
                               mean=0,
                               stddev=1,
                               dtype=tf.float32)

        if args.eval:
            z = tf.zeros(shape=(args.batch_size, args.rnn_size),
                         dtype=tf.float32)
        else:
            # Re-parameterization trick
            z = mu + tf.sqrt(tf.exp(log_sigma_sq)) * eps

        self.batch_post_embedded = z

        with tf.variable_scope("decoder"):
            decoder_cell = tf.nn.rnn_cell.GRUCell(args.rnn_size)
            decoder_init_state = z
            decoder_outputs, _ = tf.nn.dynamic_rnn(
                decoder_cell,
                decoder_inputs_embedded,
                initial_state=decoder_init_state,
                sequence_length=seq_length,
                dtype=tf.float32,
            )

        # out_size = 16900
        out_w = tf.get_variable("out_w", [out_size, args.rnn_size], tf.float32,
                                tf.random_normal_initializer(stddev=0.02))
        out_b = tf.get_variable("out_b", [out_size], tf.float32,
                                tf.constant_initializer(0.0))
        # tf.reduce_mean(input_tensor, axis=None, ...)  Reduces input_tensor to mean value along the given axis.
        # tf.reshape(tensor, shape, name=None)  Reshape the tensor into given shape, -1 indicates calculated value.
        # tf.nn.sampled_softmax_loss()  A fast way to train softmax classifier, usually an underestimate (for training only).
        batch_rec_loss = tf.reduce_mean(
            decoder_mask * tf.reshape(
                tf.nn.sampled_softmax_loss(
                    weights=out_w,
                    biases=out_b,
                    labels=tf.reshape(decoder_targets, [-1, 1]),
                    inputs=tf.reshape(decoder_outputs, [-1, args.rnn_size]),
                    num_sampled=args.neg_size,
                    num_classes=out_size), [args.batch_size, -1]),
            axis=-1  # reduce to mean along the last dimension
        )
        batch_latent_loss = -0.5 * tf.reduce_sum(
            1 + log_sigma_sq - tf.square(mu) - tf.exp(log_sigma_sq), axis=1)

        self.rec_loss = rec_loss = tf.reduce_mean(batch_rec_loss)
        self.latent_loss = latent_loss = tf.reduce_mean(batch_latent_loss)

        self.loss = loss = tf.reduce_mean([rec_loss, latent_loss])
        self.train_op = tf.train.AdamOptimizer(
            args.learning_rate).minimize(loss)

        target_out_w = tf.nn.embedding_lookup(out_w, decoder_targets)
        target_out_b = tf.nn.embedding_lookup(out_b, decoder_targets)

        self.batch_likelihood = tf.reduce_mean(decoder_mask * tf.log_sigmoid(
            tf.reduce_sum(decoder_outputs * target_out_w, -1) + target_out_b),
                                               axis=-1,
                                               name="batch_likelihood")

        # save/restore variables to/from checkpoints, max_to_keep = max #recent checkpoint files to keep.
        saver = tf.train.Saver(tf.get_collection(
            tf.GraphKeys.TRAINABLE_VARIABLES),
                               max_to_keep=10)
        self.save, self.restore = saver.save, saver.restore
def get_trainable_variables(scope):
    return tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope)
    def predict_and_report(self,
                           sequences,
                           labels,
                           W_embed,
                           report=True,
                           file=False):
        """
        PURPOSE: Prediction using best model on provided examples and generating
                 report if indicated and labels are provided.

        ARGS:
        sequences      (list(list)) order of product numbers
        labels      (list) order class labels
        W_embed     (list(list)) trained word embedding Matrix
        report      (bool) indicator for whether a report is generated
        """
        from sklearn.metrics import confusion_matrix, classification_report
        import json

        with tf.Session(graph=self.graph) as sess:
            _, saver_ = tf.get_collection('Init_Save_ops')
            saver_.restore(sess, self.final_ckpt)
            logits_ = self.graph.get_tensor_by_name(
                'OutputLyr/Logits_lyr/BiasAdd:0')
            sequences_, W_embed_, Y_, training_ = tf.get_collection(
                "Input_var")
            self.logits_prediction = logits_.eval(feed_dict={
                W_embed_: W_embed,
                sequences_: sequences,
                training_: False
            })
            self.class_prediction = np.argmax(self.logits_prediction, axis=1)

            confusion_mat = confusion_matrix(labels, self.class_prediction)
            true_neg = confusion_mat[0, 0] / (confusion_mat[0, 0] +
                                              confusion_mat[1, 0])
            false_neg = confusion_mat[0, 1] / (confusion_mat[0, 1] +
                                               confusion_mat[1, 1])
            ratio = true_neg / false_neg

            if report:
                print('-----------{}-----------'.format('Confusion Matrix'))
                print(confusion_mat, '\n')
                print(
                    '-----------{}-----------'.format('Classification Report'))
                print(classification_report(labels, self.class_prediction))
                print('True Negative:', true_neg)
                print('False Negative:', false_neg)
                print('Upper Constraint:', ratio)
            if file:
                summary_dict = self.__dict__.copy()
                class_report_dict = classification_report(
                    labels, self.class_prediction, output_dict=True)
                summary_dict.update(class_report_dict)
                summary_dict.update({
                    'true_negative': true_neg,
                    'false_negative': false_neg,
                    'upper_constraint': ratio
                })
                summary_dict.pop('graph', None)
                summary_dict.pop('logits_prediction', None)
                summary_dict.pop('class_prediction', None)
                with open(self.summary_file, 'w') as file:
                    json.dump(summary_dict, file, indent=2)
                with open(self.most_recent_summary_file, 'w') as file:
                    json.dump(summary_dict, file, indent=2)
Exemple #16
0
def inception_model_fn(features, labels, mode, params):
    """Inception v3 model using Estimator API."""
    num_classes = FLAGS.num_classes
    is_training = (mode == tf.estimator.ModeKeys.TRAIN)
    is_eval = (mode == tf.estimator.ModeKeys.EVAL)

    if isinstance(features, dict):
        features = features['feature']

    features = tensor_transform_fn(features, params['input_perm'])

    # This nested function allows us to avoid duplicating the logic which
    # builds the network, for different values of --precision.
    def build_network():
        if FLAGS.precision == 'bfloat16':
            with contrib_tpu.bfloat16_scope():
                logits, end_points = inception.inception_v3(
                    features, num_classes, is_training=is_training)
            logits = tf.cast(logits, tf.float32)
        elif FLAGS.precision == 'float32':
            logits, end_points = inception.inception_v3(
                features, num_classes, is_training=is_training)
        return logits, end_points

    if FLAGS.clear_update_collections:
        # updates_collections must be set to None in order to use fused batchnorm
        with arg_scope(
                inception.inception_v3_arg_scope(
                    weight_decay=0.0,
                    batch_norm_decay=BATCH_NORM_DECAY,
                    batch_norm_epsilon=BATCH_NORM_EPSILON,
                    updates_collections=None)):
            logits, end_points = build_network()
    else:
        with arg_scope(
                inception.inception_v3_arg_scope(
                    batch_norm_decay=BATCH_NORM_DECAY,
                    batch_norm_epsilon=BATCH_NORM_EPSILON)):
            logits, end_points = build_network()

    predictions = {
        'classes': tf.argmax(input=logits, axis=1),
        'probabilities': tf.nn.softmax(logits, name='softmax_tensor')
    }

    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(
            mode=mode,
            predictions=predictions,
            export_outputs={
                'classify': tf.estimator.export.PredictOutput(predictions)
            })

    if mode == tf.estimator.ModeKeys.EVAL and FLAGS.display_tensors and (
            not FLAGS.use_tpu):
        with tf.control_dependencies([
                tf.Print(predictions['classes'], [predictions['classes']],
                         summarize=FLAGS.eval_batch_size,
                         message='prediction: ')
        ]):
            labels = tf.Print(labels, [labels],
                              summarize=FLAGS.eval_batch_size,
                              message='label: ')

    one_hot_labels = tf.one_hot(labels, FLAGS.num_classes, dtype=tf.int32)

    if 'AuxLogits' in end_points:
        tf.losses.softmax_cross_entropy(onehot_labels=one_hot_labels,
                                        logits=tf.cast(end_points['AuxLogits'],
                                                       tf.float32),
                                        weights=0.4,
                                        label_smoothing=0.1,
                                        scope='aux_loss')

    tf.losses.softmax_cross_entropy(onehot_labels=one_hot_labels,
                                    logits=logits,
                                    weights=1.0,
                                    label_smoothing=0.1)

    losses = tf.add_n(tf.losses.get_losses())
    l2_loss = []
    for v in tf.trainable_variables():
        if 'BatchNorm' not in v.name and 'weights' in v.name:
            l2_loss.append(tf.nn.l2_loss(v))
    loss = losses + WEIGHT_DECAY * tf.add_n(l2_loss)

    initial_learning_rate = FLAGS.learning_rate * FLAGS.train_batch_size / 256
    if FLAGS.use_learning_rate_warmup:
        # Adjust initial learning rate to match final warmup rate
        warmup_decay = FLAGS.learning_rate_decay**(
            (FLAGS.warmup_epochs + FLAGS.cold_epochs) /
            FLAGS.learning_rate_decay_epochs)
        adj_initial_learning_rate = initial_learning_rate * warmup_decay

    final_learning_rate = 0.0001 * initial_learning_rate

    host_call = None
    train_op = None
    if is_training:
        batches_per_epoch = _NUM_TRAIN_IMAGES / FLAGS.train_batch_size
        global_step = tf.train.get_or_create_global_step()
        current_epoch = tf.cast(
            (tf.cast(global_step, tf.float32) / batches_per_epoch), tf.int32)

        learning_rate = tf.train.exponential_decay(
            learning_rate=initial_learning_rate,
            global_step=global_step,
            decay_steps=int(FLAGS.learning_rate_decay_epochs *
                            batches_per_epoch),
            decay_rate=FLAGS.learning_rate_decay,
            staircase=True)

        if FLAGS.use_learning_rate_warmup:
            wlr = 0.1 * adj_initial_learning_rate
            wlr_height = tf.cast(
                0.9 * adj_initial_learning_rate /
                (FLAGS.warmup_epochs + FLAGS.learning_rate_decay_epochs - 1),
                tf.float32)
            epoch_offset = tf.cast(FLAGS.cold_epochs - 1, tf.int32)
            exp_decay_start = (FLAGS.warmup_epochs + FLAGS.cold_epochs +
                               FLAGS.learning_rate_decay_epochs)
            lin_inc_lr = tf.add(
                wlr,
                tf.multiply(
                    tf.cast(tf.subtract(current_epoch, epoch_offset),
                            tf.float32), wlr_height))
            learning_rate = tf.where(
                tf.greater_equal(current_epoch, FLAGS.cold_epochs),
                (tf.where(tf.greater_equal(current_epoch, exp_decay_start),
                          learning_rate, lin_inc_lr)), wlr)

        # Set a minimum boundary for the learning rate.
        learning_rate = tf.maximum(learning_rate,
                                   final_learning_rate,
                                   name='learning_rate')

        if FLAGS.optimizer == 'sgd':
            tf.logging.info('Using SGD optimizer')
            optimizer = tf.train.GradientDescentOptimizer(
                learning_rate=learning_rate)
        elif FLAGS.optimizer == 'momentum':
            tf.logging.info('Using Momentum optimizer')
            optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,
                                                   momentum=0.9)
        elif FLAGS.optimizer == 'RMS':
            tf.logging.info('Using RMS optimizer')
            optimizer = tf.train.RMSPropOptimizer(learning_rate,
                                                  RMSPROP_DECAY,
                                                  momentum=RMSPROP_MOMENTUM,
                                                  epsilon=RMSPROP_EPSILON)
        else:
            tf.logging.fatal('Unknown optimizer:', FLAGS.optimizer)

        if FLAGS.use_tpu:
            optimizer = contrib_tpu.CrossShardOptimizer(optimizer)

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            train_op = optimizer.minimize(loss, global_step=global_step)
        if FLAGS.moving_average:
            ema = tf.train.ExponentialMovingAverage(decay=MOVING_AVERAGE_DECAY,
                                                    num_updates=global_step)
            variables_to_average = (tf.trainable_variables() +
                                    tf.moving_average_variables())
            with tf.control_dependencies([train_op
                                          ]), tf.name_scope('moving_average'):
                train_op = ema.apply(variables_to_average)

        # To log the loss, current learning rate, and epoch for Tensorboard, the
        # summary op needs to be run on the host CPU via host_call. host_call
        # expects [batch_size, ...] Tensors, thus reshape to introduce a batch
        # dimension. These Tensors are implicitly concatenated to
        # [params['batch_size']].
        gs_t = tf.reshape(global_step, [1])
        loss_t = tf.reshape(loss, [1])
        lr_t = tf.reshape(learning_rate, [1])
        ce_t = tf.reshape(current_epoch, [1])

        if not FLAGS.skip_host_call:

            def host_call_fn(gs, loss, lr, ce):
                """Training host call. Creates scalar summaries for training metrics.

        This function is executed on the CPU and should not directly reference
        any Tensors in the rest of the `model_fn`. To pass Tensors from the
        model to the `metric_fn`, provide them as part of the `host_call`. See
        https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
        for more information.

        Arguments should match the list of `Tensor` objects passed as the second
        element in the tuple passed to `host_call`.

        Args:
          gs: `Tensor with shape `[batch]` for the global_step
          loss: `Tensor` with shape `[batch]` for the training loss.
          lr: `Tensor` with shape `[batch]` for the learning_rate.
          ce: `Tensor` with shape `[batch]` for the current_epoch.

        Returns:
          List of summary ops to run on the CPU host.
        """
                gs = gs[0]
                with summary.create_file_writer(FLAGS.model_dir).as_default():
                    with summary.always_record_summaries():
                        summary.scalar('loss', tf.reduce_mean(loss), step=gs)
                        summary.scalar('learning_rate',
                                       tf.reduce_mean(lr),
                                       step=gs)
                        summary.scalar('current_epoch',
                                       tf.reduce_mean(ce),
                                       step=gs)

                        return summary.all_summary_ops()

            host_call = (host_call_fn, [gs_t, loss_t, lr_t, ce_t])

    eval_metrics = None
    if is_eval:

        def metric_fn(labels, logits):
            """Evaluation metric function. Evaluates accuracy.

      This function is executed on the CPU and should not directly reference
      any Tensors in the rest of the `model_fn`. To pass Tensors from the model
      to the `metric_fn`, provide as part of the `eval_metrics`. See
      https://www.tensorflow.org/api_docs/python/tf/contrib/tpu/TPUEstimatorSpec
      for more information.

      Arguments should match the list of `Tensor` objects passed as the second
      element in the tuple passed to `eval_metrics`.

      Args:
        labels: `Tensor` with shape `[batch, ]`.
        logits: `Tensor` with shape `[batch, num_classes]`.

      Returns:
        A dict of the metrics to return from evaluation.
      """
            predictions = tf.argmax(logits, axis=1)
            top_1_accuracy = tf.metrics.accuracy(labels, predictions)
            in_top_5 = tf.cast(tf.nn.in_top_k(logits, labels, 5), tf.float32)
            top_5_accuracy = tf.metrics.mean(in_top_5)

            return {
                'accuracy': top_1_accuracy,
                'accuracy@5': top_5_accuracy,
            }

        eval_metrics = (metric_fn, [labels, logits])

    return contrib_tpu.TPUEstimatorSpec(mode=mode,
                                        loss=loss,
                                        train_op=train_op,
                                        host_call=host_call,
                                        eval_metrics=eval_metrics)
def apply_customized_matrix_compression(
        matrix_compression_obj,  # pylint:disable=invalid-name
        weight_params_fn,
        weight_init_obj,
        layer_obj,
        weight_name,
        weight_shape,
        weight_dtype,
        scope_name='pruning_interface',
        spec=None):
    """Apply pruning or compression to a lingvo layer.

  This provides a unified interface to perform pruning or compression for a
  lingvo layer.

  Args:
    matrix_compression_obj: A Pruning or
      compression_lib.lingvo_compression_op.ApplyCompression object;
    weight_params_fn: functional handle to create model parameters;
    weight_init_obj: a weight initialization object;
    layer_obj: a layer object in the lingvo package, weight matrix of this
      layer is pruned or compressed;
    weight_name: name of the tensor that is compressed, str;
    weight_shape: shape of the weight matrix;
    weight_dtype: data type of the weight matrix;
    scope_name: TensorFlow scope for creating relavant variables.
    spec: spec to use for the compression op.

  Returns:
    None.
  """
    if isinstance(matrix_compression_obj, pruning.Pruning):
        prune_option = matrix_compression_obj.matrix_compression_spec.prune_option

        with tf.variable_scope(scope_name):
            # Create mask and threshold variable and add them to pruning collection.
            mask_pc = weight_params_fn(weight_shape,
                                       weight_init_obj.Constant(1.0),
                                       weight_dtype)
            threshold_pc = weight_params_fn([], weight_init_obj.Constant(0.0),
                                            tf.float32)
            layer_obj.CreateVariable('mask', mask_pc, trainable=False)
            layer_obj.CreateVariable('threshold',
                                     threshold_pc,
                                     trainable=False)
            if layer_obj.vars.mask not in tf.get_collection(
                    pruning.MASK_COLLECTION):
                tf.add_to_collection(pruning.WEIGHT_COLLECTION,
                                     getattr(layer_obj.vars, weight_name))
                tf.add_to_collection(pruning.MASK_COLLECTION,
                                     layer_obj.vars.mask)
                tf.add_to_collection(pruning.THRESHOLD_COLLECTION,
                                     layer_obj.vars.threshold)
            if prune_option in [
                    'first_order_gradient', 'second_order_gradient'
            ]:
                grad_pc = weight_params_fn(weight_shape,
                                           weight_init_obj.Constant(0.0),
                                           weight_dtype)
                layer_obj.CreateVariable('gradient', grad_pc, trainable=False)
                layer_obj.CreateVariable('old_weight',
                                         grad_pc,
                                         trainable=False)
                layer_obj.CreateVariable('old_old_weight',
                                         grad_pc,
                                         trainable=False)
                tf.add_to_collection(pruning.WEIGHT_GRADIENT_COLLECTION,
                                     layer_obj.vars.gradient)
                tf.add_to_collection(pruning.OLD_WEIGHT_COLLECTION,
                                     layer_obj.vars.old_weight)
                tf.add_to_collection(pruning.OLD_OLD_WEIGHT_COLLECTION,
                                     layer_obj.vars.old_old_weight)
    else:
        _ = matrix_compression_obj.customized_apply_compression(
            getattr(layer_obj.vars, weight_name),
            layer_obj,
            weight_params_fn,
            weight_init_obj,
            scope=scope_name,
            spec=spec)
        hparams = matrix_compression_obj.get_spec()
        if hparams.use_collection:
            tf.add_to_collection(UPDATE_OP_COLLECTION,
                                 matrix_compression_obj.all_update_op())
Exemple #18
0
def resnet_model_fn(features, labels, mode, params):
  """The model_fn for ResNet to be used with TPUEstimator.

  Args:
    features: `Tensor` of batched images. If transpose_input is enabled, it
        is transposed to device layout and reshaped to 1D tensor.
    labels: `Tensor` of labels for the data samples
    mode: one of `tf.estimator.ModeKeys.{TRAIN,EVAL,PREDICT}`
    params: `dict` of parameters passed to the model from the TPUEstimator,
        `params['batch_size']` is always provided and should be used as the
        effective batch size.

  Returns:
    A `TPUEstimatorSpec` for the model
  """
  if isinstance(features, dict):
    features = features['feature']

  # In most cases, the default data format NCHW instead of NHWC should be
  # used for a significant performance boost on GPU/TPU. NHWC should be used
  # only if the network needs to be run on CPU since the pooling operations
  # are only supported on NHWC.
  if params['data_format'] == 'channels_first':
    assert not params['transpose_input']    # channels_first only for GPU
    features = tf.transpose(features, [0, 3, 1, 2])

  if params['transpose_input'] and mode != tf.estimator.ModeKeys.PREDICT:
    image_size = params['image_size']
    features = tf.reshape(features, [image_size, image_size, 1, -1])
    features = tf.transpose(features, [3, 0, 1, 2])  # HWCN to NHWC

  # DropBlock keep_prob for the 4 block groups of ResNet architecture.
  # None means applying no DropBlock at the corresponding block group.
  dropblock_keep_probs = [None] * 4
  if params['dropblock_groups']:
    # Scheduled keep_prob for DropBlock.
    train_steps = tf.cast(params['train_steps'], tf.float32)
    current_step = tf.cast(tf.train.get_global_step(), tf.float32)
    current_ratio = current_step / train_steps
    dropblock_keep_prob = (1 - current_ratio * (
        1 - params['dropblock_keep_prob']))

    # Computes DropBlock keep_prob for different block groups of ResNet.
    dropblock_groups = [int(x) for x in params['dropblock_groups'].split(',')]
    for block_group in dropblock_groups:
      if block_group < 1 or block_group > 4:
        raise ValueError(
            'dropblock_groups should be a comma separated list of integers '
            'between 1 and 4 (dropblcok_groups: {}).'
            .format(params['dropblock_groups']))
      dropblock_keep_probs[block_group - 1] = 1 - (
          (1 - dropblock_keep_prob) / 4.0**(4 - block_group))

  # This nested function allows us to avoid duplicating the logic which
  # builds the network, for different values of --precision.
  def build_network():
    network = resnet_model.resnet_v1(
        resnet_depth=params['resnet_depth'],
        num_classes=params['num_label_classes'],
        dropblock_size=params['dropblock_size'],
        dropblock_keep_probs=dropblock_keep_probs,
        data_format=params['data_format'])
    return network(
        inputs=features, is_training=(mode == tf.estimator.ModeKeys.TRAIN))

  # Compute the summary statistic
  if params['precision'] == 'bfloat16':
    with tf.tpu.bfloat16_scope():
      sum_stat = build_network()
    sum_stat = tf.cast(sum_stat, tf.float32)
  elif params['precision'] == 'float32':
    sum_stat = build_network()

  if mode == tf.estimator.ModeKeys.PREDICT:
    predictions = {
        'summary': sum_stat,
    }
    return tf.estimator.EstimatorSpec(
        mode=mode,
        predictions=predictions,
        export_outputs={
            'inference': tf.estimator.export.PredictOutput(predictions)
        })

  n = params['num_label_classes']

  # If necessary, in the model_fn, use params['batch_size'] instead the batch
  # size flags (--train_batch_size or --eval_batch_size).
  batch_size = params['batch_size']   # pylint: disable=unused-variable

  # Add a little bit of scatter to the labels to smooth out the distribution
  if (params['label_smoothing'] > 0.) and (mode == tf.estimator.ModeKeys.TRAIN):
    labels += params['label_smoothing']*tf.random_normal(shape=[batch_size, n])


  # Now build a conditional density estimator from this density
  # Defines the chain of bijective transforms
  if params['training_loss'] == 'VMIM':
    net = sum_stat
    # Below is the chain for a MAF
    chain = [ tfp.bijectors.MaskedAutoregressiveFlow(
                 shift_and_log_scale_fn=masked_autoregressive_conditional_template(hidden_layers=[128,128],
                                                                                   conditional_tensor=net,
                                                                                   shift_only=False)),
              tfb.Permute(np.arange(n)[::-1]),
              tfp.bijectors.MaskedAutoregressiveFlow(
                 shift_and_log_scale_fn=masked_autoregressive_conditional_template(hidden_layers=[128,128],
                                                                                   conditional_tensor=net,
                                                                                   shift_only=False)),
              tfb.Permute(np.arange(n)[::-1]),
              tfp.bijectors.MaskedAutoregressiveFlow(
                 shift_and_log_scale_fn=masked_autoregressive_conditional_template(hidden_layers=[128,128],
                                                                                   conditional_tensor=net,
                                                                                   shift_only=True)),
              tfb.Permute(np.arange(n)[::-1]),
              tfp.bijectors.MaskedAutoregressiveFlow(
                 shift_and_log_scale_fn=masked_autoregressive_conditional_template(hidden_layers=[128,128],
                                                                                   conditional_tensor=net,
                                                                                   shift_only=True)),
            ]

    bij = tfb.Chain(chain)
    prior  = tfd.MultivariateNormalDiag(loc=tf.zeros(n), scale_identity_multiplier=1.0)
    distribution = tfd.TransformedDistribution(prior, bijector=bij)

    # Compute loss function with some L2 regularization
    loss = - tf.reduce_mean(distribution.log_prob(labels),axis=0)
  elif params['training_loss'] == 'MAE':
    loss = tf.reduce_mean(tf.keras.losses.mae(labels, sum_stat),axis=0)
  elif params['training_loss'] == 'MSE':
    loss = tf.reduce_mean(tf.keras.losses.mse(labels, sum_stat),axis=0)
  else:
    raise NotImplementedError

  # Add weight decay to the loss for non-batch-normalization variables.
  if params['enable_lars']:
    loss = loss
  else:
    loss = loss + params['weight_decay'] * tf.add_n([
        tf.nn.l2_loss(v)
        for v in tf.trainable_variables()
        if 'batch_normalization' not in v.name
    ])

  host_call = None
  if mode == tf.estimator.ModeKeys.TRAIN:
    # Compute the current epoch and associated learning rate from global_step.
    global_step = tf.train.get_global_step()
    steps_per_epoch = params['num_train_images'] / params['train_batch_size']
    current_epoch = (tf.cast(global_step, tf.float32) /
                     steps_per_epoch)
    # LARS is a large batch optimizer. LARS enables higher accuracy at batch 16K
    # and larger batch sizes.
    if params['enable_lars']:
      learning_rate = 0.0
      optimizer = lars_util.init_lars_optimizer(current_epoch, params)
    else:
      learning_rate = learning_rate_schedule(params, current_epoch)
      optimizer = tf.train.MomentumOptimizer(
          learning_rate=learning_rate,
          momentum=params['momentum'],
          use_nesterov=True)
    if params['use_tpu']:
      # When using TPU, wrap the optimizer with CrossShardOptimizer which
      # handles synchronization details between different TPU cores. To the
      # user, this should look like regular synchronous training.
      optimizer = tf.tpu.CrossShardOptimizer(optimizer)

    # Batch normalization requires UPDATE_OPS to be added as a dependency to
    # the train operation.
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
      train_op = optimizer.minimize(loss, global_step)

    if not params['skip_host_call']:
      def host_call_fn(gs, loss, lr, ce):
        """Training host call. Creates scalar summaries for training metrics.

        This function is executed on the CPU and should not directly reference
        any Tensors in the rest of the `model_fn`. To pass Tensors from the
        model to the `metric_fn`, provide as part of the `host_call`. See
        https://www.tensorflow.org/api_docs/python/tf/estimator/tpu/TPUEstimatorSpec
        for more information.

        Arguments should match the list of `Tensor` objects passed as the second
        element in the tuple passed to `host_call`.

        Args:
          gs: `Tensor with shape `[batch]` for the global_step
          loss: `Tensor` with shape `[batch]` for the training loss.
          lr: `Tensor` with shape `[batch]` for the learning_rate.
          ce: `Tensor` with shape `[batch]` for the current_epoch.

        Returns:
          List of summary ops to run on the CPU host.
        """
        gs = gs[0]
        # Host call fns are executed params['iterations_per_loop'] times after
        # one TPU loop is finished, setting max_queue value to the same as
        # number of iterations will make the summary writer only flush the data
        # to storage once per loop.
        with tf2.summary.create_file_writer(
            FLAGS.model_dir,
            max_queue=params['iterations_per_loop']).as_default():
          with tf2.summary.record_if(True):
            tf2.summary.scalar('loss', loss[0], step=gs)
            tf2.summary.scalar('learning_rate', lr[0], step=gs)
            tf2.summary.scalar('current_epoch', ce[0], step=gs)

          return tf.summary.all_v2_summary_ops()

      # To log the loss, current learning rate, and epoch for Tensorboard, the
      # summary op needs to be run on the host CPU via host_call. host_call
      # expects [batch_size, ...] Tensors, thus reshape to introduce a batch
      # dimension. These Tensors are implicitly concatenated to
      # [params['batch_size']].
      gs_t = tf.reshape(global_step, [1])
      loss_t = tf.reshape(loss, [1])
      lr_t = tf.reshape(learning_rate, [1])
      ce_t = tf.reshape(current_epoch, [1])

      host_call = (host_call_fn, [gs_t, loss_t, lr_t, ce_t])

  else:
    train_op = None

  eval_metrics = None
  return tf.estimator.tpu.TPUEstimatorSpec(
      mode=mode,
      loss=loss,
      train_op=train_op,
      host_call=host_call,
      eval_metrics=eval_metrics)
Exemple #19
0
    def __init__(self):
        self.anchor_per_scale = cfg.YOLO.ANCHOR_PER_SCALE
        self.classes = utils.read_class_names(cfg.YOLO.CLASSES)
        self.num_classes = len(self.classes)
        self.learn_rate_init = cfg.TRAIN.LEARN_RATE_INIT
        self.learn_rate_end = cfg.TRAIN.LEARN_RATE_END
        self.first_stage_epochs = cfg.TRAIN.FISRT_STAGE_EPOCHS
        self.second_stage_epochs = cfg.TRAIN.SECOND_STAGE_EPOCHS
        self.warmup_periods = cfg.TRAIN.WARMUP_EPOCHS
        self.initial_weight = cfg.TRAIN.INITIAL_WEIGHT
        self.time = time.strftime('%Y-%m-%d-%H-%M-%S',
                                  time.localtime(time.time()))
        self.moving_ave_decay = cfg.YOLO.MOVING_AVE_DECAY
        self.max_bbox_per_scale = 150
        self.train_logdir = "./data/log/train"
        self.trainset = Dataset('train')
        self.testset = Dataset('test')
        self.steps_per_period = len(self.trainset)
        self.sess = tf.Session(config=tf.ConfigProto(
            allow_soft_placement=True))

        with tf.name_scope('define_input'):
            self.input_data = tf.placeholder(dtype=tf.float32,
                                             name='input_data')
            self.label_sbbox = tf.placeholder(dtype=tf.float32,
                                              name='label_sbbox')
            self.label_mbbox = tf.placeholder(dtype=tf.float32,
                                              name='label_mbbox')
            self.label_lbbox = tf.placeholder(dtype=tf.float32,
                                              name='label_lbbox')
            self.true_sbboxes = tf.placeholder(dtype=tf.float32,
                                               name='sbboxes')
            self.true_mbboxes = tf.placeholder(dtype=tf.float32,
                                               name='mbboxes')
            self.true_lbboxes = tf.placeholder(dtype=tf.float32,
                                               name='lbboxes')
            self.trainable = tf.placeholder(dtype=tf.bool, name='training')

        with tf.name_scope("define_loss"):
            self.model = YOLOV3(self.input_data, self.trainable)
            self.net_var = tf.global_variables()
            self.giou_loss, self.conf_loss, self.prob_loss = self.model.compute_loss(
                self.label_sbbox, self.label_mbbox, self.label_lbbox,
                self.true_sbboxes, self.true_mbboxes, self.true_lbboxes)
            self.loss = self.giou_loss + self.conf_loss + self.prob_loss

        with tf.name_scope('learn_rate'):
            self.global_step = tf.Variable(1.0,
                                           dtype=tf.float64,
                                           trainable=False,
                                           name='global_step')
            warmup_steps = tf.constant(self.warmup_periods *
                                       self.steps_per_period,
                                       dtype=tf.float64,
                                       name='warmup_steps')
            train_steps = tf.constant(
                (self.first_stage_epochs + self.second_stage_epochs) *
                self.steps_per_period,
                dtype=tf.float64,
                name='train_steps')
            self.learn_rate = tf.cond(
                pred=self.global_step < warmup_steps,
                true_fn=lambda: self.global_step / warmup_steps * self.
                learn_rate_init,
                false_fn=lambda: self.learn_rate_end + 0.5 *
                (self.learn_rate_init - self.learn_rate_end) * (1 + tf.cos(
                    (self.global_step - warmup_steps) /
                    (train_steps - warmup_steps) * np.pi)))
            global_step_update = tf.assign_add(self.global_step, 1.0)

        with tf.name_scope("define_weight_decay"):
            moving_ave = tf.train.ExponentialMovingAverage(
                self.moving_ave_decay).apply(tf.trainable_variables())

        with tf.name_scope("define_first_stage_train"):
            self.first_stage_trainable_var_list = []
            for var in tf.trainable_variables():
                var_name = var.op.name
                var_name_mess = str(var_name).split('/')
                if var_name_mess[0] in [
                        'conv_sbbox', 'conv_mbbox', 'conv_lbbox'
                ]:
                    self.first_stage_trainable_var_list.append(var)

            first_stage_optimizer = tf.train.AdamOptimizer(
                self.learn_rate).minimize(
                    self.loss, var_list=self.first_stage_trainable_var_list)
            with tf.control_dependencies(
                    tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
                with tf.control_dependencies(
                    [first_stage_optimizer, global_step_update]):
                    with tf.control_dependencies([moving_ave]):
                        self.train_op_with_frozen_variables = tf.no_op()

        with tf.name_scope("define_second_stage_train"):
            second_stage_trainable_var_list = tf.trainable_variables()
            second_stage_optimizer = tf.train.AdamOptimizer(
                self.learn_rate).minimize(
                    self.loss, var_list=second_stage_trainable_var_list)

            with tf.control_dependencies(
                    tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
                with tf.control_dependencies(
                    [second_stage_optimizer, global_step_update]):
                    with tf.control_dependencies([moving_ave]):
                        self.train_op_with_all_variables = tf.no_op()

        with tf.name_scope('loader_and_saver'):
            self.loader = tf.train.Saver(self.net_var)
            self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=10)

        with tf.name_scope('summary'):
            tf.summary.scalar("learn_rate", self.learn_rate)
            tf.summary.scalar("giou_loss", self.giou_loss)
            tf.summary.scalar("conf_loss", self.conf_loss)
            tf.summary.scalar("prob_loss", self.prob_loss)
            tf.summary.scalar("total_loss", self.loss)

            logdir = "./data/log/"
            if os.path.exists(logdir): shutil.rmtree(logdir)
            os.mkdir(logdir)
            self.write_op = tf.summary.merge_all()
            self.summary_writer = tf.summary.FileWriter(logdir,
                                                        graph=self.sess.graph)
Exemple #20
0
  def model_fn(features, labels, mode, params=None): #//@follow-up Estmator Evaluation (7)
    """Build model and optimizer."""
    is_training = mode == tf.estimator.ModeKeys.TRAIN

    # Check training mode.
    if FLAGS.train_mode == 'pretrain':
      num_transforms = 2
      if FLAGS.fine_tune_after_block > -1:
        raise ValueError('Does not support layer freezing during pretraining,'
                         'should set fine_tune_after_block<=-1 for safety.')
    elif FLAGS.train_mode == 'finetune': #//@follow-up Estmator Evaluation (8)
      num_transforms = 1
    #boostx add predict
    elif FLAGS.train_mode == 'predict':  #//@audit predict
      predictions,endpoints = model(features["image"], tf.estimator.ModeKeys.TRAIN)
      _,top_5 =  tf.nn.top_k(predictions,k=5)
      predictions = {
          'top_1': tf.argmax(predictions, -1),
          'top_5': top_5,
          'probabilities': tf.nn.softmax(predictions),
          'logits': predictions,
      }
      return tf.estimator.EstimatorSpec(mode, predictions=predictions)
    #boostx:end 
    else:
      raise ValueError('Unknown train_mode {}'.format(FLAGS.train_mode))

    # Split channels, and optionally apply extra batched augmentation. #//@follow-up Estmator Evaluation (9)
    features_list = tf.split(  
        features, num_or_size_splits=num_transforms, axis=-1)
    if FLAGS.use_blur and is_training and FLAGS.train_mode == 'pretrain':
      features_list = data_util.batch_random_blur(
          features_list, FLAGS.image_size, FLAGS.image_size)
    features = tf.concat(features_list, 0)  # (num_transforms * bsz, h, w, c)

    # Base network forward pass.
    with tf.variable_scope('base_model'):
      if FLAGS.train_mode == 'finetune' and FLAGS.fine_tune_after_block >= 4:
        # Finetune just supervised (linear) head will not update BN stats.
        model_train_mode = False
      else:
        # Pretrain or finetuen anything else will update BN stats.
        model_train_mode = is_training
      hiddens = model(features, is_training=model_train_mode)

    # Add head and loss.
    if FLAGS.train_mode == 'pretrain':
      tpu_context = params['context'] if 'context' in params else None
      hiddens_proj = model_util.projection_head(hiddens, is_training)
      contrast_loss, logits_con, labels_con = obj_lib.add_contrastive_loss(
          hiddens_proj,
          hidden_norm=FLAGS.hidden_norm,
          temperature=FLAGS.temperature,
          tpu_context=tpu_context if is_training else None)
      logits_sup = tf.zeros([params['batch_size'], num_classes])
    else:
      contrast_loss = tf.zeros([])
      logits_con = tf.zeros([params['batch_size'], 10])
      labels_con = tf.zeros([params['batch_size'], 10])
      logits_sup = model_util.supervised_head(
          hiddens, num_classes, is_training)
      obj_lib.add_supervised_loss(
          labels=labels['labels'],
          logits=logits_sup,
          weights=labels['mask'])

    # Add weight decay to loss, for non-LARS optimizers.
    model_util.add_weight_decay(adjust_per_optimizer=True)
    loss = tf.losses.get_total_loss()

    if FLAGS.train_mode == 'pretrain':
      variables_to_train = tf.trainable_variables()
    else:
      collection_prefix = 'trainable_variables_inblock_'
      variables_to_train = []
      for j in range(FLAGS.fine_tune_after_block + 1, 6):
        variables_to_train += tf.get_collection(collection_prefix + str(j))
      assert variables_to_train, 'variables_to_train shouldn\'t be empty!'

    tf.logging.info('===============Variables to train (begin)===============')
    tf.logging.info(variables_to_train)
    tf.logging.info('================Variables to train (end)================')

    learning_rate = model_util.learning_rate_schedule(
        FLAGS.learning_rate, num_train_examples)

    if is_training:
      if FLAGS.train_summary_steps > 0:
        # Compute stats for the summary.
        prob_con = tf.nn.softmax(logits_con)
        entropy_con = - tf.reduce_mean(
            tf.reduce_sum(prob_con * tf.math.log(prob_con + 1e-8), -1))

        summary_writer = tf2.summary.create_file_writer(FLAGS.model_dir)
        # TODO(iamtingchen): remove this control_dependencies in the future.
        with tf.control_dependencies([summary_writer.init()]):
          with summary_writer.as_default():
            should_record = tf.math.equal(
                tf.math.floormod(tf.train.get_global_step(),
                                 FLAGS.train_summary_steps), 0)
            with tf2.summary.record_if(should_record):
              contrast_acc = tf.equal(
                  tf.argmax(labels_con, 1), tf.argmax(logits_con, axis=1))
              contrast_acc = tf.reduce_mean(tf.cast(contrast_acc, tf.float32))
              label_acc = tf.equal(
                  tf.argmax(labels['labels'], 1), tf.argmax(logits_sup, axis=1))
              label_acc = tf.reduce_mean(tf.cast(label_acc, tf.float32))
              tf2.summary.scalar(
                  'train_contrast_loss',
                  contrast_loss,
                  step=tf.train.get_global_step())
              tf2.summary.scalar(
                  'train_contrast_acc',
                  contrast_acc,
                  step=tf.train.get_global_step())
              tf2.summary.scalar(
                  'train_label_accuracy',
                  label_acc,
                  step=tf.train.get_global_step())
              tf2.summary.scalar(
                  'contrast_entropy',
                  entropy_con,
                  step=tf.train.get_global_step())
              tf2.summary.scalar(
                  'learning_rate', learning_rate,
                  step=tf.train.get_global_step())
              tf2.summary.scalar(
                  'input_mean',
                  tf.reduce_mean(features),
                  step=tf.train.get_global_step())
              tf2.summary.scalar(
                  'input_max',
                  tf.reduce_max(features),
                  step=tf.train.get_global_step())
              tf2.summary.scalar(
                  'input_min',
                  tf.reduce_min(features),
                  step=tf.train.get_global_step())
              tf2.summary.scalar(
                  'num_labels',
                  tf.reduce_mean(tf.reduce_sum(labels['labels'], -1)),
                  step=tf.train.get_global_step())

      if FLAGS.optimizer == 'momentum':
        optimizer = tf.train.MomentumOptimizer(
            learning_rate, FLAGS.momentum, use_nesterov=True)
      elif FLAGS.optimizer == 'adam':
        optimizer = tf.train.AdamOptimizer(
            learning_rate)
      elif FLAGS.optimizer == 'lars':
        optimizer = LARSOptimizer(
            learning_rate,
            momentum=FLAGS.momentum,
            weight_decay=FLAGS.weight_decay,
            exclude_from_weight_decay=['batch_normalization', 'bias'])
      else:
        raise ValueError('Unknown optimizer {}'.format(FLAGS.optimizer))

      if FLAGS.use_tpu:
        optimizer = tf.tpu.CrossShardOptimizer(optimizer)

      control_deps = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
      if FLAGS.train_summary_steps > 0:
        control_deps.extend(tf.summary.all_v2_summary_ops())
      with tf.control_dependencies(control_deps):
        train_op = optimizer.minimize(
            loss, global_step=tf.train.get_or_create_global_step(),
            var_list=variables_to_train)

      if FLAGS.checkpoint:
        def scaffold_fn():
          """Scaffold function to restore non-logits vars from checkpoint."""
          tf.train.init_from_checkpoint(
              FLAGS.checkpoint,
              {v.op.name: v.op.name
               for v in tf.global_variables(FLAGS.variable_schema)})

          if FLAGS.zero_init_logits_layer:
            # Init op that initializes output layer parameters to zeros.
            output_layer_parameters = [
                var for var in tf.trainable_variables() if var.name.startswith(
                    'head_supervised')]
            tf.logging.info('Initializing output layer parameters %s to zero',
                            [x.op.name for x in output_layer_parameters])
            with tf.control_dependencies([tf.global_variables_initializer()]):
              init_op = tf.group([
                  tf.assign(x, tf.zeros_like(x))
                  for x in output_layer_parameters])
            return tf.train.Scaffold(init_op=init_op)
          else:
            return tf.train.Scaffold()
      else:
        scaffold_fn = None

      return tf.estimator.tpu.TPUEstimatorSpec(
          mode=mode, train_op=train_op, loss=loss, scaffold_fn=scaffold_fn)
    else:
        
      def metric_fn(logits_sup, labels_sup, logits_con, labels_con, mask,
                    **kws):  #//@follow-up metric_fn (0)

        """Inner metric function."""
        metrics = {k: tf.metrics.mean(v, weights=mask)
                   for k, v in kws.items()}
        metrics['label_top_1_accuracy'] = tf.metrics.accuracy(
            tf.argmax(labels_sup, 1), tf.argmax(logits_sup, axis=1),
            weights=mask)
        metrics['label_top_5_accuracy'] = tf.metrics.recall_at_k(
            tf.argmax(labels_sup, 1), logits_sup, k=5, weights=mask)
        metrics['contrastive_top_1_accuracy'] = tf.metrics.accuracy(
            tf.argmax(labels_con, 1), tf.argmax(logits_con, axis=1),
            weights=mask)
        metrics['contrastive_top_5_accuracy'] = tf.metrics.recall_at_k(
            tf.argmax(labels_con, 1), logits_con, k=5, weights=mask)
        #//@audit save the predicted (label, logit) to logfile
        #import sys
        #tf.logging.info(labels_sup)
        #tf.print(labels_sup,output_stream=sys.stdout)
        metrics['boostx_recall'] = tf.metrics.recall(
            tf.argmax(labels_sup, 1), tf.argmax(logits_sup, axis=1), weights=mask)

        return metrics

      metrics = {
          'logits_sup': logits_sup,
          'labels_sup': labels['labels'],
          'logits_con': logits_con,
          'labels_con': labels_con,
          'mask': labels['mask'],
          'contrast_loss': tf.fill((params['batch_size'],), contrast_loss),
          'regularization_loss': tf.fill((params['batch_size'],),
                                         tf.losses.get_regularization_loss()),
      }

      return tf.estimator.tpu.TPUEstimatorSpec(
          mode=mode,
          loss=loss,
          eval_metrics=(metric_fn, metrics), #//@follow-up metric_fn (-1)
          scaffold_fn=None)
  def get_customized_apply_compression_op(self,
                                          a_matrix_tfvar,
                                          matrix_compressor,
                                          layer_obj,
                                          weight_params_fn,
                                          weight_init_obj,
                                          scope='default_scope'):
    """Returns pruning + kmeans compressed operator for a customized layer.

    Args:
      a_matrix_tfvar: TF variable representing a tensor variable in a model.
      matrix_compressor: MatrixCompressorInferface object to specify the
        compression algorithm. Must return two matrices b_matrix,c_matrix in its
        compression.
      layer_obj: a customeried layer object that handles variable creation.
      weight_params_fn: functional handle to create model parameters.
      weight_init_obj: a weight initialization object.
      scope: TF scope used for creating new TF variables.

    Returns:
      A TF node that has the compressed version of a_matrix_tfvar.
    """
    self.matrix_compressor = matrix_compressor
    a_matrix = np.zeros(shape=a_matrix_tfvar.shape)
    if getattr(self._spec, 'do_transpose', False):
      a_matrix = np.transpose(a_matrix)
    [b_matrix, c_matrix] = matrix_compressor.static_matrix_compressor(a_matrix)

    self.uncompressed_size = matrix_compressor.uncompressed_size
    self.compressed_size = matrix_compressor.compressed_size

    p = layer_obj.params
    with tf.variable_scope(scope) as scope:
      # Create pruning relevant variables.
      mask_pc = weight_params_fn(a_matrix.shape, weight_init_obj.Constant(1.0),
                                 p.dtype)
      threshold_pc = weight_params_fn([], weight_init_obj.Constant(0.0),
                                      tf.float32)
      self._create_layer_variable(layer_obj, 'mask', mask_pc, None, False)
      self._create_layer_variable(layer_obj, 'threshold', threshold_pc, None,
                                  False)
      if layer_obj.vars.mask not in tf.get_collection(pruning.MASK_COLLECTION):
        tf.add_to_collection(pruning.WEIGHT_COLLECTION, layer_obj.vars.wm)
        tf.add_to_collection(pruning.MASK_COLLECTION, layer_obj.vars.mask)
        tf.add_to_collection(pruning.THRESHOLD_COLLECTION,
                             layer_obj.vars.threshold)
      if self.pruning_obj.get_spec().prune_option in [
          'first_order_gradient', 'second_order_gradient'
      ]:
        grad_pc = weight_params_fn(a_matrix.shape,
                                   weight_init_obj.Constant(0.0), p.dtype)
        self._create_layer_variable(layer_obj, 'gradient', grad_pc, None, False)
        self._create_layer_variable(layer_obj, 'old_weight', grad_pc, None,
                                    False)
        self._create_layer_variable(layer_obj, 'old_old_weight', grad_pc, None,
                                    False)
        tf.add_to_collection(pruning.WEIGHT_GRADIENT_COLLECTION,
                             layer_obj.vars.gradient)
        tf.add_to_collection(pruning.OLD_WEIGHT_COLLECTION,
                             layer_obj.vars.old_weight)
        tf.add_to_collection(pruning.OLD_OLD_WEIGHT_COLLECTION,
                             layer_obj.vars.old_old_weight)

      b_matrix_pc = weight_params_fn(b_matrix.shape,
                                     weight_init_obj.Constant(1.0), p.dtype)
      c_matrix_pc = weight_params_fn(c_matrix.shape,
                                     weight_init_obj.Constant(1), tf.int32)
      alpha_pc = weight_params_fn([], weight_init_obj.Constant(1.0), tf.float32)

      self._create_layer_variable(layer_obj, 'alpha', alpha_pc, None, False)
      self._create_layer_variable(
          layer_obj,
          'b_matrix_tfvar',
          b_matrix_pc,
          None,
          trainable=self.matrix_compressor.get_spec().is_b_matrix_trainable)
      self._create_layer_variable(
          layer_obj,
          'c_matrix_tfvar',
          c_matrix_pc,
          None,
          trainable=self.matrix_compressor.get_spec().is_c_matrix_trainable)

      self.b_matrix_tfvar = layer_obj.vars.b_matrix_tfvar
      self.c_matrix_tfvar = layer_obj.vars.c_matrix_tfvar
      self.alpha = layer_obj.vars.alpha
      self.a_matrix_tfvar = a_matrix_tfvar
      self.mask = layer_obj.vars.mask
      self.threshold = layer_obj.vars.threshold

      self.pruned_a_matrix_tfvar = tf.multiply(layer_obj.vars.wm,
                                               layer_obj.vars.mask,
                                               'masked_weight')

    def maybe_apply_compression():
      """Decide whether global step is within compression range.

      Returns:
        is_step_within_compression_range: bool.
      """
      with tf.compat.v1.name_scope(self._spec.name):
        # Compress if current step is more than begin_compression_step and
        # less than end_compression_step (unless it's negative)
        global_step = tf.train.get_global_step()
        def real_global_step_fn():
          return tf.cast(tf.train.get_global_step(), tf.int32)
        def mock_global_step_fn():
          return self._spec.begin_compression_step
        def is_global_step_none(global_step):
          return tf.constant(global_step is None, dtype=tf.bool)
        global_step = tf.cond(is_global_step_none(global_step),
                              mock_global_step_fn,
                              real_global_step_fn)
        is_step_within_compression_range = tf.logical_and(
            tf.greater_equal(
                tf.cast(global_step, tf.int32),
                self._spec.begin_compression_step),
            tf.logical_or(
                tf.less_equal(
                    tf.cast(global_step, tf.int32),
                    self._spec.end_compression_step),
                tf.less(self._spec.end_compression_step, 0)))
        return is_step_within_compression_range

    if getattr(self._spec, 'do_transpose', False):
      self.pruning_and_compression_op = (
          self.alpha * self.pruned_a_matrix_tfvar +
          (1 - self.alpha) * tf.math.multiply(
              tf.transpose(
                  tf.reshape(
                      tf.nn.embedding_lookup(self.b_matrix_tfvar,
                                             self.c_matrix_tfvar),
                      tf.transpose(a_matrix_tfvar).shape)),
              self.mask,
              name='pruned_compressed_weight'))
    else:
      self.pruning_and_compression_op = (
          self.alpha * self.pruned_a_matrix_tfvar +
          (1 - self.alpha) * tf.math.multiply(
              tf.reshape(
                  tf.nn.embedding_lookup(self.b_matrix_tfvar,
                                         self.c_matrix_tfvar),
                  a_matrix_tfvar.shape),
              self.mask,
              name='pruned_compressed_weight'))

    def pruned_a_matrix_fn():
      return self.pruned_a_matrix_tfvar

    def quantized_pruned_a_matrix_fn():
      return self.pruning_and_compression_op

    self.final_op = tf.cond(maybe_apply_compression(),
                            quantized_pruned_a_matrix_fn, pruned_a_matrix_fn)

    self.add_compression_summaries()
    self.pruning_obj.add_pruning_summaries()
    self.update_op = tf.no_op()
    return [self.final_op, self.update_op]
Exemple #22
0
def get_layer_variables_by_scope(scope_name):
    ret = []
    for v in tf.get_collection(tf.GraphKeys.MODEL_VARIABLES):
        if scope_name + '/' in v.name:
            ret.append(v)
    return ret
Exemple #23
0
    def __init__(self,
                 sess,
                 env,
                 handle,
                 name,
                 update_every=5,
                 use_mf=False,
                 learning_rate=1e-4,
                 tau=0.005,
                 gamma=0.95):
        # assert isinstance(env, GridWorld)
        self.env = env
        self.name = name
        self._saver = None
        self.sess = sess

        self.handle = handle
        self.view_space = env.get_view_space(handle)
        assert len(self.view_space) == 3
        self.feature_space = env.get_feature_space(handle)
        self.num_actions = env.get_action_space(handle)[0]

        self.update_every = update_every
        self.use_mf = use_mf  # trigger of using mean field
        self.temperature = 0.1

        self.lr = learning_rate
        self.tau = tau
        self.gamma = gamma

        with tf.variable_scope(name or "ValueNet"):
            self.name_scope = tf.get_variable_scope().name
            self.obs_input = tf.placeholder(tf.float32,
                                            (None, ) + self.view_space,
                                            name="Obs-Input")
            self.feat_input = tf.placeholder(tf.float32,
                                             (None, ) + self.feature_space,
                                             name="Feat-Input")
            self.mask = tf.placeholder(tf.float32,
                                       shape=(None, ),
                                       name='Terminate-Mask')

            if self.use_mf:
                self.act_prob_input0 = tf.placeholder(tf.float32,
                                                      (None, self.num_actions),
                                                      name="Act-Prob-Input0")
                self.act_prob_input1 = tf.placeholder(tf.float32,
                                                      (None, self.num_actions),
                                                      name="Act-Prob-Input1")
                self.act_prob_input2 = tf.placeholder(tf.float32,
                                                      (None, self.num_actions),
                                                      name="Act-Prob-Input2")
                self.act_prob_input3 = tf.placeholder(tf.float32,
                                                      (None, self.num_actions),
                                                      name="Act-Prob-Input3")

            self.act_input = tf.placeholder(tf.int32, (None, ), name="Act")
            self.act_one_hot = tf.one_hot(self.act_input,
                                          depth=self.num_actions,
                                          on_value=1.0,
                                          off_value=0.0)

            with tf.variable_scope("Eval-Net"):
                self.eval_name = tf.get_variable_scope().name
                self.e_q = self._construct_net(active_func=tf.nn.relu)
                self.predict = tf.nn.softmax(self.e_q / self.temperature)
                self.e_variables = tf.get_collection(
                    tf.GraphKeys.GLOBAL_VARIABLES, scope=self.eval_name)

            with tf.variable_scope("Target-Net"):
                self.target_name = tf.get_variable_scope().name
                self.t_q = self._construct_net(active_func=tf.nn.relu)
                self.t_variables = tf.get_collection(
                    tf.GraphKeys.GLOBAL_VARIABLES, scope=self.target_name)

            with tf.variable_scope("Update"):
                self.update_op = [
                    tf.assign(
                        self.t_variables[i], self.tau * self.e_variables[i] +
                        (1. - self.tau) * self.t_variables[i])
                    for i in range(len(self.t_variables))
                ]

            with tf.variable_scope("Optimization"):
                self.target_q_input = tf.placeholder(tf.float32, (None, ),
                                                     name="Q-Input")
                self.e_q_max = tf.reduce_sum(tf.multiply(
                    self.act_one_hot, self.e_q),
                                             axis=1)
                self.loss = tf.reduce_sum(
                    tf.square(self.target_q_input - self.e_q_max) *
                    self.mask) / tf.reduce_sum(self.mask)
                self.train_op = tf.train.AdamOptimizer(self.lr).minimize(
                    self.loss)
Exemple #24
0
def main(unused_argv):
    tf.logging.set_verbosity(FLAGS.log)

    if FLAGS.checkpoint_path:
        checkpoint_path = FLAGS.checkpoint_path
    else:
        expdir = FLAGS.expdir
        tf.logging.info("Will load latest checkpoint from %s.", expdir)
        while not tf.gfile.Exists(expdir):
            tf.logging.fatal("\tExperiment save dir '%s' does not exist!",
                             expdir)
            sys.exit(1)

        try:
            checkpoint_path = tf.train.latest_checkpoint(expdir)
        except tf.errors.NotFoundError:
            tf.logging.fatal(
                "There was a problem determining the latest checkpoint.")
            sys.exit(1)

    if not tf.train.checkpoint_exists(checkpoint_path):
        tf.logging.fatal("Invalid checkpoint path: %s", checkpoint_path)
        sys.exit(1)

    savedir = FLAGS.savedir
    if not tf.gfile.Exists(savedir):
        tf.gfile.MakeDirs(savedir)

    # Make the graph
    with tf.Graph().as_default():
        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            model = utils.get_module("baseline.models.%s" % FLAGS.model)
            hparams = model.get_hparams(FLAGS.config)

            # Load the trained model with is_training=False
            with tf.name_scope("Reader"):
                batch = reader.NSynthDataset(
                    FLAGS.tfrecord_path,
                    is_training=False).get_baseline_batch(hparams)

            _ = model.train_op(batch, hparams, FLAGS.config)
            z = tf.get_collection("z")[0]

            init_op = tf.group(tf.global_variables_initializer(),
                               tf.local_variables_initializer())
            sess.run(init_op)

            # Add ops to save and restore all the variables.
            # Restore variables from disk.
            saver = tf.train.Saver()
            saver.restore(sess, checkpoint_path)
            tf.logging.info("Model restored.")

            # Start up some threads
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)
            i = 0
            z_val = []
            try:
                while True:
                    if coord.should_stop():
                        break
                    res_val = sess.run([z])
                    z_val.append(res_val[0])
                    tf.logging.info("Iter: %d" % i)
                    tf.logging.info("Z:{}".format(res_val[0].shape))
                    i += 1
                    if i + 1 % 1 == 0:
                        save_arrays(savedir, hparams, z_val)
            # Report all exceptions to the coordinator, pylint: disable=broad-except
            except Exception as e:
                coord.request_stop(e)
            # pylint: enable=broad-except
            finally:
                save_arrays(savedir, hparams, z_val)
                # Terminate as usual.  It is innocuous to request stop twice.
                coord.request_stop()
                coord.join(threads)
Exemple #25
0
def create_optimizer(loss, init_lr, num_train_steps, num_warmup_steps, use_tpu,
                     exclude_bert):
  """Creates an optimizer training op, optionally excluding BERT vars."""
  global_step = tf.train.get_or_create_global_step()

  learning_rate = tf.constant(value=init_lr, shape=[], dtype=tf.float32)

  # Implements linear decay of the learning rate.
  learning_rate = tf.train.polynomial_decay(
      learning_rate,
      global_step,
      num_train_steps,
      end_learning_rate=0.0,
      power=1.0,
      cycle=False)

  # Implements linear warmup. I.e., if global_step < num_warmup_steps, the
  # learning rate will be `global_step/num_warmup_steps * init_lr`.
  if num_warmup_steps:
    global_steps_int = tf.cast(global_step, tf.int32)
    warmup_steps_int = tf.constant(num_warmup_steps, dtype=tf.int32)

    global_steps_float = tf.cast(global_steps_int, tf.float32)
    warmup_steps_float = tf.cast(warmup_steps_int, tf.float32)

    warmup_percent_done = global_steps_float / warmup_steps_float
    warmup_learning_rate = init_lr * warmup_percent_done

    is_warmup = tf.cast(global_steps_int < warmup_steps_int, tf.float32)
    learning_rate = ((1.0 - is_warmup) * learning_rate +
                     is_warmup * warmup_learning_rate)

  # It is recommended that you use this optimizer for fine tuning, since this
  # is how the model was trained (note that the Adam m/v variables are NOT
  # loaded from init_checkpoint.)
  optimizer = optimization.AdamWeightDecayOptimizer(
      learning_rate=learning_rate,
      weight_decay_rate=0.01,
      beta_1=0.9,
      beta_2=0.999,
      epsilon=1e-6,
      exclude_from_weight_decay=["LayerNorm", "layer_norm", "bias"])

  if use_tpu:
    optimizer = tf_estimator.tpu.CrossShardOptimizer(optimizer)

  tvars = tf.trainable_variables()
  if exclude_bert:
    bert_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "bert")
    tvars = [vv for vv in tvars if vv not in bert_vars]

  tf.logging.info("Training the following variables:")
  for vv in tvars:
    tf.logging.info(vv.name)

  grads = tf.gradients(loss, tvars, colocate_gradients_with_ops=True)

  # This is how the model was pre-trained.
  (grads, _) = tf.clip_by_global_norm(grads, clip_norm=1.0)

  train_op = optimizer.apply_gradients(
      zip(grads, tvars), global_step=global_step)

  new_global_step = global_step + 1
  train_op = tf.group(train_op, [global_step.assign(new_global_step)])
  return train_op
def find_trainable_variables(key):
    return tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                             ".*{}.*".format(key))
Exemple #27
0
def main():
    if FLAGS.datasource == 'sinusoid':
        if FLAGS.train:
            test_num_updates = 1
        else:
            test_num_updates = 10
    else:
        if FLAGS.datasource == 'miniimagenet':
            if FLAGS.train:
                test_num_updates = 1  # eval on at least one update during training
            else:
                test_num_updates = 10
        else:
            test_num_updates = 10

    if not FLAGS.train:
        orig_meta_batch_size = FLAGS.meta_batch_size
        # always use meta batch size of 1 when testing.
        FLAGS.meta_batch_size = 1

    if FLAGS.datasource == 'sinusoid':
        data_generator = DataGenerator(FLAGS.update_batch_size * 2,
                                       FLAGS.meta_batch_size)
    else:
        if FLAGS.metatrain_iterations == 0 and FLAGS.datasource == 'miniimagenet':
            assert FLAGS.meta_batch_size == 1
            assert FLAGS.update_batch_size == 1
            data_generator = DataGenerator(
                1, FLAGS.meta_batch_size)  # only use one datapoint,
        else:
            if FLAGS.datasource == 'miniimagenet':  # TODO - use 15 val examples for imagenet?
                if FLAGS.train:
                    data_generator = DataGenerator(
                        FLAGS.update_batch_size + 15, FLAGS.meta_batch_size
                    )  # only use one datapoint for testing to save memory
                else:
                    data_generator = DataGenerator(
                        FLAGS.update_batch_size * 2, FLAGS.meta_batch_size
                    )  # only use one datapoint for testing to save memory
            else:
                data_generator = DataGenerator(
                    FLAGS.update_batch_size * 2, FLAGS.meta_batch_size
                )  # only use one datapoint for testing to save memory

    dim_output = data_generator.dim_output
    if FLAGS.baseline == 'oracle':
        assert FLAGS.datasource == 'sinusoid'
        dim_input = 3
        FLAGS.pretrain_iterations += FLAGS.metatrain_iterations
        FLAGS.metatrain_iterations = 0
    else:
        dim_input = data_generator.dim_input

    if FLAGS.datasource == 'miniimagenet' or FLAGS.datasource == 'omniglot':
        tf_data_load = True
        num_classes = data_generator.num_classes

        if FLAGS.train:  # only construct training model if needed
            random.seed(5)
            image_tensor, label_tensor = data_generator.make_data_tensor()
            inputa = tf.slice(image_tensor, [0, 0, 0],
                              [-1, num_classes * FLAGS.update_batch_size, -1])
            inputb = tf.slice(image_tensor,
                              [0, num_classes * FLAGS.update_batch_size, 0],
                              [-1, -1, -1])
            labela = tf.slice(label_tensor, [0, 0, 0],
                              [-1, num_classes * FLAGS.update_batch_size, -1])
            labelb = tf.slice(label_tensor,
                              [0, num_classes * FLAGS.update_batch_size, 0],
                              [-1, -1, -1])
            input_tensors = {
                'inputa': inputa,
                'inputb': inputb,
                'labela': labela,
                'labelb': labelb
            }
            print("inputa shape", inputa.shape)

        random.seed(6)
        image_tensor, label_tensor = data_generator.make_data_tensor(
            train=False)
        inputa = tf.slice(image_tensor, [0, 0, 0],
                          [-1, num_classes * FLAGS.update_batch_size, -1])
        inputb = tf.slice(image_tensor,
                          [0, num_classes * FLAGS.update_batch_size, 0],
                          [-1, -1, -1])
        labela = tf.slice(label_tensor, [0, 0, 0],
                          [-1, num_classes * FLAGS.update_batch_size, -1])
        labelb = tf.slice(label_tensor,
                          [0, num_classes * FLAGS.update_batch_size, 0],
                          [-1, -1, -1])
        metaval_input_tensors = {
            'inputa': inputa,
            'inputb': inputb,
            'labela': labela,
            'labelb': labelb
        }
    else:
        tf_data_load = False
        input_tensors = None

    model = MAML(dim_input, dim_output, test_num_updates=test_num_updates)
    if FLAGS.train or not tf_data_load:
        model.construct_model(input_tensors=input_tensors, prefix='metatrain_')
    if tf_data_load:
        model.construct_model(input_tensors=metaval_input_tensors,
                              prefix='metaval_')
    model.summ_op = tf.summary.merge_all()

    saver = loader = tf.train.Saver(tf.get_collection(
        tf.GraphKeys.TRAINABLE_VARIABLES),
                                    max_to_keep=10)

    sess = tf.InteractiveSession()

    if not FLAGS.train:
        # change to original meta batch size when loading model.
        FLAGS.meta_batch_size = orig_meta_batch_size

    if FLAGS.train_update_batch_size == -1:
        FLAGS.train_update_batch_size = FLAGS.update_batch_size
    if FLAGS.train_update_lr == -1:
        FLAGS.train_update_lr = FLAGS.update_lr

    exp_string = 'cls_' + str(FLAGS.num_classes) + '.mbs_' + str(
        FLAGS.meta_batch_size) + '.ubs_' + str(
            FLAGS.train_update_batch_size) + '.numstep' + str(
                FLAGS.num_updates) + '.updatelr' + str(FLAGS.train_update_lr)

    if FLAGS.num_filters != 64:
        exp_string += 'hidden' + str(FLAGS.num_filters)
    if FLAGS.max_pool:
        exp_string += 'maxpool'
    if FLAGS.stop_grad:
        exp_string += 'stopgrad'
    if FLAGS.baseline:
        exp_string += FLAGS.baseline
    if FLAGS.norm == 'batch_norm':
        exp_string += 'batchnorm'
    elif FLAGS.norm == 'layer_norm':
        exp_string += 'layernorm'
    elif FLAGS.norm == 'None':
        exp_string += 'nonorm'
    else:
        print('Norm setting not recognized.')

    resume_itr = 0
    model_file = None

    tf.global_variables_initializer().run()
    tf.train.start_queue_runners()

    if not FLAGS.rand_init:
        if FLAGS.resume or not FLAGS.train:
            model_file = tf.train.latest_checkpoint(FLAGS.logdir + '/' +
                                                    exp_string)
            if FLAGS.test_iter > 0:
                model_file = model_file[:model_file.index('model'
                                                          )] + 'model' + str(
                                                              FLAGS.test_iter)
            if model_file:
                ind1 = model_file.index('model')
                resume_itr = int(model_file[ind1 + 5:])
                print("Restoring model weights from " + model_file)
                saver.restore(sess, model_file)

    if FLAGS.train:
        train(model, saver, sess, exp_string, data_generator, resume_itr)
    else:
        test(model, saver, sess, exp_string, data_generator, test_num_updates)
    def train_graph(self, train_dict):
        """
        PURPOSE: Train a deep neural net classifier for baskets of products

        ARGS:
        train_dict          (dict) dictionary with ALL the following key values
            embeddings       (list(list)) trained product embedding layers
            sequences_train        (list(list)) training order product numbers
            labels_train        (list) training order class labels
            sequences_valid         (list(list)) test order product numbers
            labels_valid         (list) test order class
            batch_size          (int) number of training example per mini batch
            n_stop              (int) early stopping criteria
        """
        embeddings = train_dict.get('embeddings', None)
        sequences_train = train_dict.get('sequences_train', None)
        labels_train = train_dict.get('labels_train', None)
        sequences_valid = train_dict.get('sequences_valid', None)
        labels_valid = train_dict.get('labels_valid', None)
        batch_size = train_dict.get('batch_size', 100)
        n_stop = train_dict.get('n_stop', 5)

        n_train_ex = len(sequences_train)
        n_batches = n_train_ex // batch_size
        done, epoch, acc_reg = 0, 0, [0, 1]

        with self.graph.as_default():
            correct_, accuracy_ = tf.get_collection('Eval_ops')
            acc_summary = tf.summary.scalar('Accuracy', accuracy_)
            file_writer = tf.summary.FileWriter(self.log_dir, self.graph)

        with tf.Session(graph=self.graph) as sess:
            init_, saver_ = tf.get_collection('Init_Save_ops')
            correct_, accuracy_ = tf.get_collection('Eval_ops')
            optimizer_, training_op_ = tf.get_collection("Optimizer_ops")
            sequences_, W_embed_, Y_, training_ = tf.get_collection(
                "Input_var")

            sess.run(init_)
            while done != 1:
                epoch += 1
                batches = self._partition_(list(range(n_train_ex)), n_batches)
                #Mini-Batch Training step
                for iteration in ProgressBar(
                        range(n_batches), 'Epoch {} Iterations'.format(epoch)):
                    sequences_batch = [
                        sequences_train[indx] for indx in batches[iteration]
                    ]
                    labels_batch = [
                        labels_train[indx] for indx in batches[iteration]
                    ]
                    sess.run(
                        [training_op_],
                        feed_dict={
                            training_: True,
                            W_embed_: embeddings,
                            sequences_: sequences_batch,
                            Y_: labels_batch
                        })
                    #Intermediate Summary Writing
                    if iteration % 10 == 0:
                        summary_str = acc_summary.eval(
                            feed_dict={
                                training_: False,
                                W_embed_: embeddings,
                                sequences_: sequences_valid,
                                Y_: labels_valid
                            })
                        step = epoch * n_batches + iteration
                        file_writer.add_summary(summary_str, step)
                #Early Stopping Regularization
                if epoch % 1 == 0:
                    # Evaluating the Accuracy of Current Model
                    acc_ckpt = accuracy_.eval(
                        feed_dict={
                            training_: False,
                            W_embed_: embeddings,
                            sequences_: sequences_valid,
                            Y_: labels_valid
                        })
                    if acc_ckpt > acc_reg[0]:
                        # Saving the new "best" model
                        save_path = saver_.save(sess, self.temp_ckpt)
                        acc_reg = [acc_ckpt, 1]
                    elif acc_ckpt <= acc_reg[0] and acc_reg[1] < n_stop:
                        acc_reg[1] += 1
                    elif acc_ckpt <= acc_reg[0] and acc_reg[1] >= n_stop:
                        #Restoring previous "best" model
                        saver_.restore(sess, self.temp_ckpt)
                        done = 1
                #Calculating Accuracy for Output
                acc_train = accuracy_.eval(
                    feed_dict={
                        training_: False,
                        W_embed_: embeddings,
                        sequences_: sequences_train,
                        Y_: labels_train
                    })
                acc_test = accuracy_.eval(
                    feed_dict={
                        training_: False,
                        W_embed_: embeddings,
                        sequences_: sequences_valid,
                        Y_: labels_valid
                    })
                print(
                    'Register:{} Epoch:{:2d} Train Accuracy:{:6.4f} Validation Accuracy: {:6.4f}'
                    .format(acc_reg, epoch, acc_train, acc_test))
                #Final Model Save
                save_path = saver_.save(sess, self.final_ckpt)
Exemple #29
0
    def model_fn(features, labels, mode, params):
        """Returns the model function."""
        feature = features['feature']
        print(feature)
        labels = labels['label']
        one_hot_labels = model_utils.get_label(
            labels,
            params,
            FLAGS.src_num_classes,
            batch_size=FLAGS.train_batch_size)

        def get_logits():
            """Return the logits."""
            avg_pool = model.conv_model(feature, mode)
            name = 'final_dense_dst'
            with tf.variable_scope('target_CLS'):
                logits = tf.layers.dense(inputs=avg_pool,
                                         units=FLAGS.src_num_classes,
                                         name=name)
            return logits

        logits = get_logits()
        logits = tf.cast(logits, tf.float32)

        dst_loss = tf.losses.softmax_cross_entropy(
            logits=logits,
            onehot_labels=one_hot_labels,
        )
        dst_l2_loss = FLAGS.weight_decay * tf.add_n([
            tf.nn.l2_loss(v) for v in tf.trainable_variables()
            if 'batch_normalization' not in v.name and 'kernel' in v.name
        ])

        loss = dst_loss + dst_l2_loss

        train_op = None
        if mode == tf.estimator.ModeKeys.TRAIN:
            cur_finetune_step = tf.train.get_global_step()
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                finetune_learning_rate = lr_schedule()
                optimizer = tf.train.AdamOptimizer(finetune_learning_rate)

                train_op = tf.contrib.slim.learning.create_train_op(
                    loss, optimizer)
                with tf.variable_scope('finetune'):
                    train_op = optimizer.minimize(loss, cur_finetune_step)
        else:
            train_op = None

        eval_metrics = None
        if mode == tf.estimator.ModeKeys.EVAL:
            eval_metrics = model_utils.metric_fn(labels, logits)

        if mode == tf.estimator.ModeKeys.TRAIN:
            with tf.control_dependencies([train_op]):
                tf.summary.scalar('classifier/finetune_lr',
                                  finetune_learning_rate)
        else:
            train_op = None

        return tf.estimator.EstimatorSpec(
            mode=mode,
            loss=loss,
            train_op=train_op,
            eval_metric_ops=eval_metrics,
        )
Exemple #30
0
# Use function binding to create all the builder functions that are neeeded:
bound_train_model = partial(model, lr_placeholder, outfeed_train_queue, True)
bound_train_loop = partial(loop_builder, batches_per_step, bound_train_model,
                           infeed_train_queue)
bound_test_model = partial(model, lr_placeholder, outfeed_test_queue, False)
bound_test_loop = partial(loop_builder, test_batches, bound_test_model,
                          infeed_test_queue)

# Use the bound builder functions to place the model on the IPU:
with scopes.ipu_scope("/device:IPU:0"):
    train_loop = ipu_compiler.compile(bound_train_loop, inputs=[])
    test_loop = ipu_compiler.compile(bound_test_loop, inputs=[])

# Initialisers should go on the CPU:
with tf.device("cpu"):
    metrics_vars = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES,
                                     scope="metrics")
    metrics_initializer = tf.variables_initializer(var_list=metrics_vars)
    saver = tf.train.Saver()

# Setup and acquire an IPU device:
config = utils.create_ipu_config()
config = utils.auto_select_ipus(config, 1)
utils.configure_ipu_system(config)

# These allow us to retrieve the results of IPU feeds:
dequeue_train_outfeed = outfeed_train_queue.dequeue()
dequeue_test_outfeed = outfeed_test_queue.dequeue()

# Create a benchmark program for the infeed to determine maximum achievable throughput:
infeed_perf = dataset_benchmark.infeed_benchmark(infeed_train_queue, epochs,
                                                 num_train, True)