Example #1
0
    def init_from_orbit(self,
                        period,
                        lighttime,
                        tref=0.0,
                        eccen=1e-5,
                        varpi=0.0):
        """Initialize the parameters based on an orbit estimate

        Args:
            period: The orbital period in units of ``time``.
            lighttime: The projected light travel time in units of ``time``
                (:math:`a_1\,\sin(i)/c`).
            tref: The reference time in units of ``time``.
            eccen: The orbital eccentricity.
            varpi: The angle of the ascending node in radians.

        """
        ops = []
        ops.append(tf.assign(self.period, period))
        ops.append(
            tf.assign(self.lighttime,
                      lighttime + tf.zeros_like(self.lighttime)))
        ops.append(tf.assign(self.tref, tref))
        if self.with_eccen:
            ops.append(
                tf.assign(self.eccen_param,
                          np.log(eccen) - np.log(1.0 - eccen)))
            ops.append(tf.assign(self.varpi, varpi))
        self.run(ops)
Example #2
0
    def _init_references(self):
        # print ('DeepLIFT: computing references...')
        sys.stdout.flush()
        self._deeplift_ref.clear()
        ops = []

        # Original: Full set of operations
        if self.init_ref is 'default':
            g = tf.get_default_graph()
            ops_check = g.get_operations()
        elif self.init_ref is 'custom':
            ## Custom: limit operations to those downstream of the input
            print('Custom: limit operations to those downstream of the input')
            ops_check = [descendants(op) for op in self.X.consumers()]
            ops_check = list(set(chain.from_iterable(ops_check)))

        for op in ops_check:
            if len(op.inputs) > 0 and not op.name.startswith('gradients'):
                if op.type in SUPPORTED_ACTIVATIONS:
                    ops.append(op)

        YR = self._session_run([o.inputs[0] for o in ops], self.baseline)
        for (r, op) in zip(YR, ops):
            self._deeplift_ref[op.name] = r
        # print('DeepLIFT: references ready')
        sys.stdout.flush()
Example #3
0
 def states2states(self, states, to_states):
     ops = []
     for i in xrange(len(states)):
         copy_c = to_states[i].c.assign(states[i].c)
         copy_h = to_states[i].h.assign(states[i].h)
         ops.append(copy_c)
         ops.append(copy_h)
         
     return ops
Example #4
0
    def tranform2transform(self, top_status, top_status_transform,
                           to_statuses):
        ops = []
        copy_s = to_statuses[0].assign(top_status)
        copy_st = to_statuses[1].assign(top_status_transform)
        ops.append(copy_s)
        ops.append(copy_st)

        return ops
Example #5
0
 def _setup_graph(self):
     vars = tf.trainable_variables()
     ops = []
     for v in vars:
         n = v.op.name
         if not n.startswith('discrim/'):
             continue
         logger.info("Clip {}".format(n))
         ops.append(tf.assign(v, tf.clip_by_value(v, -0.01, 0.01)))
     self._op = tf.group(*ops, name='clip')
Example #6
0
    def output_after2before(self, beam_parent):
        # beam_parent : [beam_size]
        ops = []
        for i in xrange(len(self.after_output)):
            o = self.after_output
            new_o = tf.nn.embedding_lookup(o, beam_parent)
            copy_o = self.before_state[i].c.assign(new_o)
            ops.append(copy_o)

        return ops
 def setup_as_moving_average_of(self, src_net, beta=0.99, beta_nontrainable=0.0):
     assert isinstance(src_net, Network)
     with absolute_name_scope(self.scope):
         with tf.name_scope('MovingAvg'):
             ops = []
             for name, var in self.vars.items():
                 if name in src_net.vars:
                     cur_beta = beta if name in self.trainables else beta_nontrainable
                     new_value = lerp(src_net.vars[name], var, cur_beta)
                     ops.append(var.assign(new_value))
             return tf.group(*ops)
Example #8
0
    def states2states_shuffle(self, states, to_states, beam_parent):
        ops = []
        for i in xrange(len(states)):
            copy_c = self.state2state_shuffle(to_states[i].c, states[i].c,
                                              beam_parent)
            copy_h = self.state2state_shuffle(to_states[i].h, states[i].h,
                                              beam_parent)
            ops.append(copy_c)
            ops.append(copy_h)

        return ops
Example #9
0
def relaxed_distance(rx_step):
    """Distance between relaxed variables and their average."""
    res, ops, rx_done = [], [], {}
    for v in tf.trainable_variables():
        if v.name[0:2] == "RX":
            rx_name = v.op.name[v.name.find("/") + 1:]
            if rx_name not in rx_done:
                avg, dist_loss = relaxed_average(rx_name, rx_step)
                res.append(dist_loss)
                rx_done[rx_name] = avg
            ops.append(v.assign(rx_done[rx_name]))
    return tf.add_n(res), tf.group(*ops)
Example #10
0
def get_parents(grad, op_type):
    ops = list()
    wave = set([grad.op])
    while wave:
        new_wave = set()
        for op in wave:
            for op in (t.op for t in op.inputs):
                if op.type == op_type:
                    ops.append(op)
                else:
                    new_wave.add(op)
        wave = new_wave
    return ops
Example #11
0
def set_vars(var_to_value_dict):
	ops = []
	feed_dict = {}
	for var, value in var_to_value_dict.items():
		assert is_tf_expression(var)
		try:
			setter = tf.get_default_graph().get_tensor_by_name(var.name.replace(':0', '/setter:0')) # look for existing op
		except KeyError:
			with absolute_name_scope(var.name.split(':')[0]):
				with tf.control_dependencies(None): # ignore surrounding control_dependencies
					setter = tf.assign(var, tf.placeholder(var.dtype, var.shape, 'new_value'), name='setter') # create new setter
		ops.append(setter)
		feed_dict[setter.op.inputs[1]] = value
	run(ops, feed_dict)
Example #12
0
 def after2before(self, beam_parent):
     # beam_parent : [beam_size]
     ops = []
     for i in xrange(len(self.after_state)):
         c = self.after_state[i].c
         h = self.after_state[i].h
         new_c = tf.nn.embedding_lookup(c, beam_parent)
         new_h = tf.nn.embedding_lookup(h, beam_parent)
         copy_c = self.before_state[i].c.assign(new_c)
         copy_h = self.before_state[i].h.assign(new_h)
         ops.append(copy_c)
         ops.append(copy_h)
         
     return ops
Example #13
0
 def check(self, *args, **keys):
   if not self.frozen:
     heartbeat()
   ops = []
   seen = set()
   for name, action in self.commands:
     full = self.full_path(name)
     if not os.path.isdir(full):
       if name not in seen:
         seen.add(name)
         ops.append(name)
   for op in ops:
     self.run(op, *args, **keys)
   return ops
Example #14
0
 def restore(self, sess, path, index):
   ops = []
   feed = {}
   for k, net in self._nets.items():
     filename = os.path.join(path, "{}.l2l-{}".format(k, index))
     data = pickle.load(open(filename, "rb"))
     vars = snt.get_variables_in_module(net)
     for v in vars:
       split = v.name.split(":")[0].split("/")
       module_name = split[-2]
       variable_name = split[-1]
       feed[self.restore_pl[k][module_name][variable_name]] = data[module_name][variable_name]
       ops.append(self.assigns[k][module_name][variable_name])
   sess.run(ops, feed_dict=feed)
Example #15
0
def sync_variables_op(mpi_rank, num_comms=2, prereduce=0):
    ops = list()
    prev = []
    with tf.device("/gpu:0"):
        for var in tf.trainable_variables():
            with tf.control_dependencies(prev):
                op = tf.assign(
                    var,
                    allreduce(var if mpi_rank == 0 else var * 0.0,
                              num_comms=num_comms,
                              prereduce=prereduce))
            prev = [op]
            ops.append(op)

    return tf.group(*ops)
Example #16
0
 def _init_references(self):
     # print ('DeepLIFT: computing references...')
     sys.stdout.flush()
     self._deeplift_ref.clear()
     ops = []
     g = tf.get_default_graph()
     for op in g.get_operations():
         if len(op.inputs) > 0 and not op.name.startswith('gradients'):
             if op.type in SUPPORTED_ACTIVATIONS:
                 ops.append(op)
     YR = self.session_run([o.inputs[0] for o in ops], self.baseline)
     for (r, op) in zip(YR, ops):
         self._deeplift_ref[op.name] = r
     # print('DeepLIFT: references ready')
     sys.stdout.flush()
Example #17
0
def ft_optimizer_list(cost, opt_vars, optimizer, lrs, grad_clip=False):
    """Efficient optimization for fine tuning a net."""
    ops = []
    gvs = []
    for v, l in zip(opt_vars, lrs):
        if grad_clip:
            optim = optimizer(l)
            gvs = optim.compute_gradients(cost, var_list=v)
            capped_gvs = [(tf.clip_by_norm(grad, 10.),
                           var) if grad is not None else (grad, var)
                          for grad, var in gvs]
            ops.append(optim.apply_gradients(capped_gvs))
        else:
            ops.append(optimizer(l).minimize(cost, var_list=v))
    return tf.group(*ops), gvs
Example #18
0
 def _init_references(self):
     # print ('DeepLIFT: computing references...')
     sys.stdout.flush()
     self._deeplift_ref.clear()
     ops = []
     g = tf.get_default_graph()
     for op in g.get_operations():
         if len(op.inputs) > 0 and not op.name.startswith('gradients'):
             if op.type in SUPPORTED_ACTIVATIONS:
                 ops.append(op)
     for op in ops:
         r = self.session_run(op.inputs[0], self.train_x[0:0 + 1, :])
         for i in range(1, self.train_x.shape[0]):
             r += self.session_run(op.inputs[0], self.train_x[i:i + 1, :])
         self._deeplift_ref[op.name] = r / self.train_x.shape[0]
     # print('DeepLIFT: references ready')
     sys.stdout.flush()
Example #19
0
 def apply_gradients(self, grads_and_vars, global_step=None, name=None):
     ops = []
     if not self.stage_weights:
         return tf.train.MomentumOptimizer(
             self.lr,
             momentum=self.momentum).apply_gradients(grads_and_vars, name)
     for stage, weights in self.stage_weights.items():
         lr_decay = self.stage_lr_decay[stage]
         mom_decay = self.stage_mom_decay[stage]
         lr = self.lr * lr_decay
         mom = self.momentum * mom_decay
         grads_and_vars_opt = [(g, v) for g, v in grads_and_vars
                               if v.name in weights]
         ops.append(
             tf.train.MomentumOptimizer(lr, momentum=mom).apply_gradients(
                 grads_and_vars_opt, name))
     return tf.group(ops)
Example #20
0
def cast_variables(variables, graph=None, cache_ops=None):
  if graph is None:
    graph = get_default_graph()
  if cache_ops is None:
    cache_ops = state.cache_ops
  if graph not in cache_ops:
    cache_ops[graph] = {}
  cache = cache_ops[graph]
  ops = []
  for variable in variables:
    if variable in cache:
      op = cache[variable]
    elif variable.dtype == dtypes.bfloat16_ref or variable.dtype == tf.bfloat16:
      op = tf.cast(variable, tf.float32)
    else:
      op = variable
    cache[variable] = op
    ops.append(op)
  return ops
Example #21
0
def get_parents(grad, op_type):
    if grad.op.type == op_type:
        return [grad.op]
    ops  = list()
    wave = set([grad.op])
    while wave:
        new_wave = set()
        for op in wave:
            # print(op.name)
            # for i in op.inputs:
            #     print("   ", i.name)
            # print()
            for op in (t.op for t in op.inputs):
                if op.type == op_type:
                    ops.append(op)
                else:
                    new_wave.add(op)
        wave = new_wave
    return ops
Example #22
0
    def _init_references(self):
        sys.stdout.flush()
        self._deeplift_ref.clear()
        ops = []
        g = self.session.graph
        # get subgraph starting from the target node down
        subgraph = tf.graph_util.extract_sub_graph(g.as_graph_def(), [self.T.name.split(':')[0]])

        for n in subgraph.node:
            op = g.get_operation_by_name(n.name)
            if len(op.inputs) > 0 and not op.name.startswith('gradients'):
                if op.type in SUPPORTED_ACTIVATIONS:
                    ops.append(op)
                    print(op.name)

        ins = [o.inputs[0] for o in ops]
        print('ins', ins)
        YR = self.session_run(ins, self.baseline)
        for (r, op) in zip(YR, ops):
            self._deeplift_ref[op.name] = r
        sys.stdout.flush()
Example #23
0
def sync_globals_zero_init_op(num_comms=2, prereduce=0):
    ops = list()
    prev = []
    with tf.device("/gpu:0"):
        for var in tf.global_variables():
            if var.dtype.base_dtype not in [tf.float32, tf.float16]:
                cast_back = True
                to_reduce = tf.cast(var, tf.float32)
            else:
                to_reduce = var
                cast_back = False
            with tf.control_dependencies(prev):
                reduced = allreduce(to_reduce,
                                    num_comms=num_comms,
                                    prereduce=prereduce)
            if cast_back:
                reduced = tf.cast(reduced, var.dtype.base_dtype)
            op = tf.assign(var, reduced)
            prev = [op]
            ops.append(op)

    return tf.group(*ops)
Example #24
0
    def _apply_dense(self, grad, var):
        var_dtype = var.dtype.base_dtype

        lr = math_ops.cast(self._lr_t, var_dtype)
        beta = self._beta
        epsilon = self._epsilon
        t = math_ops.cast(self.iterations + 1, var_dtype)

        ops = []

        # Update running sum
        s = self.get_slot(var, 'sum')
        grad_sq = math_ops.square(grad)
        s_new = s + grad_sq
        ops.append(state_ops.assign(s, s_new, use_locking=self._use_locking))

        # Update running counter
        if self._sparse_counter:
            n = self.get_slot(var, 'counter')
            n_new = n + math_ops.sign(grad_sq)
            ops.append(
                state_ops.assign(n, n_new, use_locking=self._use_locking))
        else:
            # Counter is not sparse; just use the current timestep instead
            n_new = t

        # Compute step size
        average = math_ops.div_no_nan(s_new, n_new)
        step = grad / (epsilon + math_ops.sqrt(average))

        # Update momentum
        if self._use_momentum:
            m = self.get_slot(var, 'momentum')
            m_new = beta * m + (1.0 - beta) * step
            ops.append(
                state_ops.assign(m, m_new, use_locking=self._use_locking))
            # Bias correction
            lr = lr / (1.0 - pow(beta, t))
        else:
            # No momentum; just use the current step instead
            m_new = step

        # Update parameters
        ops.append(
            state_ops.assign_sub(var,
                                 lr * m_new,
                                 use_locking=self._use_locking))
        return control_flow_ops.group(*ops)
Example #25
0
 def hatt_after2before(self,beam_parent):
     ops = []
     new_h_att = tf.nn.embedding_lookup(self.after_h_att,beam_parent)
     copy_op = self.before_h_att.assign(new_h_att)
     ops.append(copy_op)
     return ops
Example #26
0
def group_lstm_grads(grads, params, scope="grouped_lstm", group_size=None):

    grad = None
    grad_idx = None
    for i, (g, p) in enumerate(zip(grads, params)):
        if scope in p.name and "kernel" in p.name:
            grad = g
            grad_idx = i
            break
    assert grad is not None

    # backward walk param grad to find dw MatMul ops
    # walk should terminate with each MatMul op
    ops = list()
    wave = set([grad.op])
    while wave:
        new_wave = set()
        for op in wave:
            for op in (t.op for t in op.inputs):
                # TN MatMul ops
                if op.type == "MatMul" and op.get_attr(
                        "transpose_a") and not op.get_attr("transpose_b"):
                    ops.append(op)
                else:
                    new_wave.add(op)
        wave = new_wave

    # sort op names descending and split out the lstms (if weights are shared)
    last_lstm = None
    lstms = list()
    ops.sort(key=lambda op: op.name, reverse=True)
    for op in ops:
        # gradients/grouped_lstm/lstm_2/step_00_grad/MatMul_1 => lstm_2
        lstm = op.name.split("/")[-3]
        if last_lstm != lstm:
            lstms.insert(0, list())
            last_lstm = lstm
        lstms[0].append(op)

    # we're going to be using absolute names, so clear name_scope
    with tf.name_scope(None):

        lstm_grads = list()
        for lstm_ops in lstms:

            # default dw op to one big matmul per lstm
            if group_size is None:
                group_size = len(lstm_ops)

            # use the lstm scope for the new ops
            # gradients/grouped_lstm/lstm_2/step_00_grad/MatMul_1 => gradients/grouped_lstm/lstm_2
            scope = lstm_ops[-1].name.split('/')
            scope = '/'.join(scope[0:-2])

            offset = 0
            while offset < len(lstm_ops):

                xs = tf.concat([
                    op.inputs[0] for op in lstm_ops[offset:offset + group_size]
                ],
                               axis=0)
                gs = tf.concat([
                    op.inputs[1] for op in lstm_ops[offset:offset + group_size]
                ],
                               axis=0)

                mmop = tf.matmul(xs,
                                 gs,
                                 transpose_a=True,
                                 transpose_b=False,
                                 name="%s/dw_%04d" % (scope, offset))
                grad = mmop if offset == 0 else ew.add(
                    grad, mmop, name="%s/add_%04d" % (scope, offset))

                offset += group_size

            lstm_grads.append(grad)

        if len(lstms) > 1:
            from blocksparse.ewops import add_n
            # gradients/grouped_lstm/lstm_2/step_00_grad/MatMul_1 => gradients/grouped_lstm
            scope = lstms[0][-1].name.split('/')
            scope = '/'.join(scope[0:-3])
            grads[grad_idx] = tf.add_n(lstm_grads, name="%s/add_n" % scope)
        else:
            grads[grad_idx] = lstm_grads[0]

    #grads modified in place


# lstm_scopes = dict()
# # rediculous amount of code just to be able to re-enter a variable scope without its name being re-numbered.
# # https://github.com/tensorflow/tensorflow/pull/14390
# global lstm_scopes
# if scope not in lstm_scopes:
#     with tf.variable_scope(scope) as lstm_scope:
#         lstm_scopes[scope] = lstm_scope
# lstm_scope = lstm_scopes[scope]

# with tf.variable_scope(lstm_scope, auxiliary_name_scope=False), tf.name_scope(lstm_scope.original_name_scope):
#     with tf.variable_scope(weights_scope, reuse=weights_reuse):
#         w = tf.get_variable('kernel', shape=[in_width + width, 4 * width])
#         if bias_scope is None:
#             b = tf.get_variable('bias', shape=[4 * width])
#             if layernorm:
#                 g = tf.get_variable('gain', shape=[4 * width])

#     if bias_scope is not None:
#         with tf.variable_scope(bias_scope, reuse=bias_reuse):
#             b = tf.get_variable('bias', shape=[4 * width])
#             if layernorm:
#                 g = tf.get_variable('gain', shape=[4 * width])
Example #27
0
    def output2output(self, output, to_output):
        ops = []
        copy_o = to_output.assign(output)
        ops.append(copy_o)

        return ops
Example #28
0
    def apply_updates(self):
        assert not self._updates_applied
        self._updates_applied = True
        devices = list(self._dev_grads.keys())
        total_grads = sum(len(grads) for grads in self._dev_grads.values())
        assert len(devices) >= 1 and total_grads >= 1
        ops = []
        with absolute_name_scope(self.scope):

            # Cast gradients to FP32 and calculate partial sum within each device.
            dev_grads = OrderedDict()  # device => [(grad, var), ...]
            for dev_idx, dev in enumerate(devices):
                with tf.name_scope('ProcessGrads%d' % dev_idx), tf.device(dev):
                    sums = []
                    for gv in zip(*self._dev_grads[dev]):
                        assert all(v is gv[0][1] for g, v in gv)
                        g = [tf.cast(g, tf.float32) for g, v in gv]
                        g = g[0] if len(g) == 1 else tf.add_n(g)
                        sums.append((g, gv[0][1]))
                    dev_grads[dev] = sums

            # Sum gradients across devices.
            if len(devices) > 1:
                with tf.name_scope('SumAcrossGPUs'), tf.device(None):
                    for var_idx, grad_shape in enumerate(self._grad_shapes):
                        g = [dev_grads[dev][var_idx][0] for dev in devices]
                        if np.prod(
                                grad_shape
                        ):  # nccl does not support zero-sized tensors
                            g = tf.contrib.nccl.all_sum(g)
                        for dev, gg in zip(devices, g):
                            dev_grads[dev][var_idx] = (
                                gg, dev_grads[dev][var_idx][1])

            # Apply updates separately on each device.
            for dev_idx, (dev, grads) in enumerate(dev_grads.items()):
                with tf.name_scope('ApplyGrads%d' % dev_idx), tf.device(dev):

                    # Scale gradients as needed.
                    if self.use_loss_scaling or total_grads > 1:
                        with tf.name_scope('Scale'):
                            coef = tf.constant(np.float32(1.0 / total_grads),
                                               name='coef')
                            coef = self.undo_loss_scaling(coef)
                            grads = [(g * coef, v) for g, v in grads]

                    # Check for overflows.
                    with tf.name_scope('CheckOverflow'):
                        grad_ok = tf.reduce_all(
                            tf.stack([
                                tf.reduce_all(tf.is_finite(g))
                                for g, v in grads
                            ]))

                    # Update weights and adjust loss scaling.
                    with tf.name_scope('UpdateWeights'):
                        opt = self._dev_opt[dev]
                        ls_var = self.get_loss_scaling_var(dev)
                        if not self.use_loss_scaling:
                            ops.append(
                                tf.cond(grad_ok,
                                        lambda: opt.apply_gradients(grads),
                                        tf.no_op))
                        else:
                            ops.append(
                                tf.cond(
                                    grad_ok, lambda: tf.group(
                                        tf.assign_add(ls_var, self.
                                                      loss_scaling_inc),
                                        opt.apply_gradients(grads)),
                                    lambda: tf.group(
                                        tf.assign_sub(ls_var, self.
                                                      loss_scaling_dec))))

                    # Report statistics on the last device.
                    if dev == devices[-1]:
                        with tf.name_scope('Statistics'):
                            ops.append(
                                autosummary(self.id + '/learning_rate',
                                            self.learning_rate))
                            ops.append(
                                autosummary(self.id + '/overflow_frequency',
                                            tf.where(grad_ok, 0, 1)))
                            if self.use_loss_scaling:
                                ops.append(
                                    autosummary(self.id + '/loss_scaling_log2',
                                                ls_var))

            # Initialize variables and group everything into a single op.
            self.reset_optimizer_state()
            init_uninited_vars(list(self._dev_ls_var.values()))
            return tf.group(*ops, name='TrainingOp')
Example #29
0
def assign_vars(distribution, variables, values):
    ops = []
    for variable, value in zip(variables, values):
        ops.append(variable.assign(value))
    return ops
Example #30
0
  context = create_test_xla_compile_context()
  context.Enter()
  o = a.assign(2)
  context.Exit()
  return o


op = lambda x: tpu_ops.tpu_ops.collective_permute(x, [[0, 1], [1, 0], [2, 3], [3, 2], [4, 5], [5, 4], [6, 7], [7, 6]])
zz = tpu_ops.shard(op, outputs_from_all_shards=True, num_shards=8, inputs=[[tf.constant([x+1], dtype=tf.float32) for x in range(8)]]); sess.run(zz)


ops = []
for core in range(8):
  for step in range(8):
    with tf.control_dependencies(ops):
      ops.append(tpu_ops.tpu_ops.infeed_enqueue([tf.constant(step, tf.float32)], shape=[1], device_ordinal=core))


topology = tpu_topology
topology_rank = len(topology.mesh_shape)
mesh_shape = topology.mesh_shape

computation_shape = None
computation_stride = None
num_replicas = 3



if computation_shape is None:
  computation_shape = np.array([1] * topology_rank, dtype=np.int32)
else: