Exemple #1
0
def get_run_op():
    tower_grads = []
    devices = get_all_devices()
    losses = []
    if FLAGS.num_workers == 1:
        devices = []
        for i in range(FLAGS.phy_blocks):
            devices.append('/gpu:0')
            with tf.device('/gpu:0'):
                data, labels = fake_data(FLAGS.batch_size, 1)
                weights = initialize_weights()
                logit = inference(data, weights)
                _loss = loss(logit, labels)
                opt = tf.train.GradientDescentOptimizer(learning_rate=0.5,
                                                        name=("opt%d" % i))
                tower_grads.append(opt.compute_gradients(_loss, weights))
    else:
        for i in range(FLAGS.num_workers):
            with tf.device('/gpu:%d' % i):
                data, labels = fake_data(FLAGS.batch_size, 1)
                weights = initialize_weights()
                logit = inference(data, weights)
                _loss = loss(logit, labels)
                opt = tf.train.GradientDescentOptimizer(learning_rate=0.5,
                                                        name=("opt%d" % i))
                tower_grads.append(opt.compute_gradients(_loss, weights))
    return aggregrate_gradients(tower_grads, devices)
Exemple #2
0
def get_run_op():
  # Create an optimizer that performs gradient descent.
  #opt = tf.train.GradientDescentOptimizer(learning_rate=0.01)
  slice_size = FLAGS.batch_size / FLAGS.num_cuts
  print('Slice size:{}'.format(slice_size))
  data = None
  label = None
  last_fc = [tf.no_op()]
  with tf.device('/gpu:0'):
    data = tf.get_variable(
        name = 'data',
        shape=[slice_size, FLAGS.hidden_size],
        trainable=False)
    '''
    label = tf.get_variable(
        name = 'label',
        shape = [slice_size, FLAGS.hidden_size],
        trainable=False))
    with tf.variable_scope('fc_in'):
      weight_in = tf.zeros([1000, FLAGS.hidden_size])
      for k in xrange(FLAGS.num_cuts):
        with tf.control_dependencies([last_fc[-1]]):
            last_fc.append(tf.matmul(data[k+1], weight_in))
    '''
  for i in xrange(FLAGS.num_cuts):
    last_fc.append(data)
  for i in xrange(FLAGS.num_layers):
    dev = '/gpu:%d' % (i * FLAGS.num_gpus / FLAGS.num_layers)
    with tf.device(dev), scopes.arg_scope([variables.variable], device=dev):
      tmp_fc = [tf.no_op()]
      with tf.variable_scope('fc%d' % i):
        w = tf.get_variable(
            name='w',
            shape=[FLAGS.hidden_size, FLAGS.hidden_size],
            trainable=True)
        for k in xrange(FLAGS.num_cuts):
          with tf.control_dependencies([tmp_fc[-1]]):
            tmp_fc.append(tf.matmul(last_fc[k+1], w))
      last_fc = tmp_fc
      if i == FLAGS.num_layers - 1:
        with tf.control_dependencies(last_fc):
          train_op = tf.no_op()
  '''
  with tf.device('/gpu:%d' % (FLAGS.num_gpus - 1)):
    tmp_fc = [tf.no_op()]
    with tf.variable_scope('fc_out'):
      weight_out = tf.zeros([FLAGS.hidden_size, 1000])
      for k in xrange(FLAGS.num_cuts):
        with tf.control_dependencies([tmp_fc[-1]]):
          tmp_fc.append(tf.matmul(last_fc[k+1], weight_out))
    last_fc = tmp_fc
  loss = tf.nn_softmax_cross_entropy_with_logits(last_fc, labels, name='xentropy')
  grads = opt.compute_gradients(loss)
  apply_gradient_op = opt.apply_gradients(grads)

  train_op = tf.group(apply_gradient_op)
  '''
  init_op = tf.initialize_all_variables()

  return init_op, train_op
Exemple #3
0
 def __init__(self, planes, args, phase=1, filters=192, board_size=15, model_dir="./value_net_models",
              model_file=None,
              device="gpu", gpu=1, optimizer="sgd", learn_rate=1e-6, distributed_train=False):
     self.board_size = board_size
     self.phase = phase
     self.planes = planes
     # init network
     if distributed_train:
         ps_device = "/job:ps/task:0/cpu:0"
         worker_device = "/job:worker/task:%d/gpu:%d" % (args.task_index, args.gpu_id)
     else:
         ps_device = "/cpu:0"
         if device == "cpu":
             worker_device = "/cpu:0"
         else:
             worker_device = "/gpu:%d" % gpu
     self.tf_var = dict()
     self.tf_var["in"], self.tf_var["out"] = AI_net.create_value_network(
         planes, ps_device, worker_device, filters=filters, board_size=self.board_size, name_prefix="value_net")
     # super init
     AI_net.SuperNetwork.__init__(self, model_dir=model_dir)
     history_step = int(self.param_unserierlize(init_params={"global_step": 0})["global_step"])
     with tf.device(ps_device):
         self.global_step = tf.Variable(history_step)
     # loss function
     with tf.device(worker_device):
         self.loss_function(optimizer, learn_rate, args.values_net_batch_size)
  def __init__(
      self,
      remote_device,
      local_device,
      top_delta_size=64,
      top_delta_layers=2,
      compute_h_size=64,
      compute_h_layers=1,
      delta_dim=32,
      num_grad_channels=4,
      normalize_epsilon=1.,
  ):
    self.local_device = local_device
    self.remote_device = remote_device
    self.top_delta_size = top_delta_size
    self.top_delta_layers = top_delta_layers
    self.compute_h_size = compute_h_size
    self.compute_h_layers = compute_h_layers
    self.delta_dim = delta_dim
    self.num_grad_channels = num_grad_channels
    self.normalize_epsilon = normalize_epsilon,

    with tf.device(local_device):
      self.opt = optimizers.UnrollableGradientDescentRollingOptimizer(
          learning_rate=1e-4)

    # lazily initialized for readouts
    self.readout_mods = {}

    super(MoreLocalWeightUpdateProcess,
          self).__init__(name='MoreLocalWeightUpdateProcess')

    with tf.device(remote_device):
      self()
    def __init__(self, session, np_matrix, rank,
                 learning_rate=0.1):
        matrix = tf.constant(np_matrix, dtype=tf.float32)
        scale = 2 * np.sqrt(np_matrix.mean() / rank)
        initializer = tf.random_uniform_initializer(maxval=scale)

        with tf.device('/job:ps/task:0'):
            self.matrix_W = tf.get_variable(
                "W", (np_matrix.shape[0], rank), initializer=initializer
            )
        with tf.device("/job:ps/task:1"):
            self.matrix_H = tf.get_variable(
                "H", (rank, np_matrix.shape[1]), initializer=initializer
            )

        matrix_WH = tf.matmul(self.matrix_W, self.matrix_H)
        f_norm = tf.reduce_sum(tf.pow(matrix - matrix_WH, 2))

        nn_w = tf.reduce_sum(tf.abs(self.matrix_W) - self.matrix_W)
        nn_h = tf.reduce_sum(tf.abs(self.matrix_H) - self.matrix_H)
        constraint = INFINITY * (nn_w + nn_h)
        self.loss = f_norm + constraint
        self.constraint = constraint

        self.session = session
        self.optimizer = tf.train.GradientDescentOptimizer(
            learning_rate
        ).minimize(self.loss)
  def testHandleDeletion(self):
    if not tf.test.is_built_with_cuda():
      return True
    if not self.haveGpu0():
      return True

    dtype = tf.float32

    config = tf.ConfigProto(log_device_placement=True)
    sess = tf.Session(config=config)

    # initial values live on CPU
    with tf.device("/cpu:0"):
      one = tf.constant(1, dtype=dtype)
      one_handle = sess.run(tf.get_session_handle(one))
      x_handle = sess.run(tf.get_session_handle(one))

    # addition lives on GPU
    with tf.device("/gpu:0"):
      add_holder1, add_tensor1 = tf.get_session_tensor(one_handle.handle, dtype)
      add_holder2, add_tensor2 = tf.get_session_tensor(one_handle.handle, dtype)
      add_op = tf.add(add_tensor1, add_tensor2)
      add_output = tf.get_session_handle(add_op)


    # add 1 to tensor 20 times to exceed _DEAD_HANDLES_THRESHOLD
    for _ in range(20):
      x_handle = sess.run(add_output, feed_dict={add_holder1: one_handle.handle,
                                                 add_holder2: x_handle.handle})
 def _apply_drop_path(self, net):
   """Apply drop_path regularization to net."""
   drop_path_keep_prob = self._drop_path_keep_prob
   if drop_path_keep_prob < 1.0:
     # Scale keep prob by layer number
     assert self._cell_num != -1
     # The added 2 is for the reduction cells
     num_cells = self._total_num_cells
     layer_ratio = (self._cell_num + 1)/float(num_cells)
     with tf.device('/cpu:0'):
       tf.summary.scalar('layer_ratio', layer_ratio)
     drop_path_keep_prob = 1 - layer_ratio * (1 - drop_path_keep_prob)
     # Decrease the keep probability over time
     current_step = tf.cast(tf.train.get_or_create_global_step(),
                            tf.float32)
     drop_path_burn_in_steps = self._total_training_steps
     current_ratio = (
         current_step / drop_path_burn_in_steps)
     current_ratio = tf.minimum(1.0, current_ratio)
     with tf.device('/cpu:0'):
       tf.summary.scalar('current_ratio', current_ratio)
     drop_path_keep_prob = (
         1 - current_ratio * (1 - drop_path_keep_prob))
     with tf.device('/cpu:0'):
       tf.summary.scalar('drop_path_keep_prob', drop_path_keep_prob)
     net = drop_path(net, drop_path_keep_prob)
   return net
Exemple #8
0
 def testColocation(self):
   with tf.device("/job:ps"):
     var = tf.Variable(0, name="v")
   with tf.device("/job:worker/task:7"):
     assign_op = var.assign(1)
   self.assertDeviceEqual("/job:ps", assign_op.device)
   self.assertEqual([b"loc:@v"], assign_op.op.colocation_groups())
  def testReturnsSingleCheckpointIfOneShardedCheckpoint(self):
    checkpoint_dir = os.path.join(self.get_temp_dir(),
                                  'one_checkpoint_found_sharded')
    if not tf.gfile.Exists(checkpoint_dir):
      tf.gfile.MakeDirs(checkpoint_dir)

    global_step = tf.contrib.framework.get_or_create_global_step()

    # This will result in 3 different checkpoint shard files.
    with tf.device('/cpu:0'):
      tf.Variable(10, name='v0')
    with tf.device('/cpu:1'):
      tf.Variable(20, name='v1')

    saver = tf.train.Saver(sharded=True)

    with tf.Session(
        target='',
        config=tf.ConfigProto(device_count={'CPU': 2})) as session:

      session.run(tf.initialize_all_variables())
      save_path = os.path.join(checkpoint_dir, 'model.ckpt')
      saver.save(session, save_path, global_step=global_step)

    num_found = 0
    for _ in tf.contrib.training.checkpoints_iterator(
        checkpoint_dir, timeout=0):
      num_found += 1
    self.assertEqual(num_found, 1)
  def _model_fn(features, labels, mode, params):
    model_fn = MODELS[FLAGS.model].model_fn

    global_step = tf.train.get_or_create_global_step()

    if FLAGS.num_gpus > 0 and mode == learn.ModeKeys.TRAIN:
      split_features = {k: tf.split(v, FLAGS.num_gpus)
                        for k, v in features.iteritems()}
      split_labels = {k: tf.split(v, FLAGS.num_gpus)
                      for k, v in labels.iteritems()}
      grads = []
      predictions = collections.defaultdict(list)
      losses = []

      opt = ops.create_optimizer(
        params.optimizer, params.learning_rate, params.decay_steps)

      for i in range(FLAGS.num_gpus):
        with tf.device(tf.DeviceSpec(device_type='GPU', device_index=i)):
          with tf.name_scope('tower_%d' % i):
            with tf.variable_scope(tf.get_variable_scope(), reuse=i > 0):
              device_features = {k: v[i] for k, v in split_features.iteritems()}
              device_labels = {k: v[i] for k, v in split_labels.iteritems()}

              device_predictions, device_loss = model_fn(
                device_features, device_labels, mode, params)

              for k, v in device_predictions.iteritems():
                predictions[k].append(v)

              if device_loss is not None:
                losses.append(device_loss)

              device_grads = opt.compute_gradients(device_loss)
              grads.append(device_grads)

      grads = ops.average_gradients(grads)
      train_op = opt.apply_gradients(grads, global_step=global_step)

      for k, v in predictions.iteritems():
        predictions[k] = tf.concat(v, axis=0)

      loss = tf.add_n(losses) if losses else None
    else:
      with tf.device(tf.DeviceSpec(device_type='GPU', device_index=0)):
        predictions, loss = model_fn(features, labels, mode, params)

        train_op = None
        if mode == learn.ModeKeys.TRAIN:
          opt = ops.create_optimizer(
            params.optimizer, params.learning_rate, params.decay_steps)
          train_op = opt.minimize(loss, global_step=global_step)

    tf.summary.scalar('loss/loss', loss)

    return tf.contrib.learn.ModelFnOps(
      mode=mode,
      predictions=predictions,
      loss=loss,
      train_op=train_op)
Exemple #11
0
  def testAnalysisAndAllocations(self):
    run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
    run_metadata = tf.RunMetadata()
    config = tf.ConfigProto(device_count={'CPU': 3})

    with tf.Session(config=config) as sess:
      with tf.device('/cpu:0'):
        const1 = tf.constant(1.0, name='const1')
      with tf.device('/cpu:1'):
        const2 = tf.constant(2.0, name='const2')
      with tf.device('/cpu:2'):
        result = const1 + const2 + const1 * const2
      sess.run(result, options=run_options, run_metadata=run_metadata)

    self.assertTrue(run_metadata.HasField('step_stats'))
    tl = timeline.Timeline(run_metadata.step_stats)
    step_analysis = tl.analyze_step_stats()
    ctf = step_analysis.chrome_trace.format_to_string()
    self._validateTrace(ctf)
    maximums = step_analysis.allocator_maximums
    self.assertTrue('cpu' in maximums)
    cpu_max = maximums['cpu']
    # At least const1 + const2, both float32s (4 bytes each)
    self.assertGreater(cpu_max.num_bytes, 8)
    self.assertGreater(cpu_max.timestamp, 0)
    self.assertTrue('const1' in cpu_max.tensors)
    self.assertTrue('const2' in cpu_max.tensors)
    def test_single_output(self):
        print('*** Running Test: ' + self.__class__.__name__ + ' function: ' + _getframe().f_code.co_name)

        class AddOp(Operator):
            def op(self, x, y):
                pos = position_in(x.shape)
                out = output_like(x)
                out[pos] = x[pos] + y[pos]
                return out

        in0 = np.random.random(5).astype(np.float32)
        in1 = np.random.random(5).astype(np.float32)
        reference = 4*(in0 + in1)*(in0 + in1)

        with tf.Session() as sess:
            with tf.device('/cpu:0'):
                a = in0*2
                b = in1*2
                c = AddOp(a, b, clear_cache=True).as_tensorflow()
                squared = tf.square(c)
            if cuda_enabled:
                with tf.device('/gpu:0'):
                    a_gpu = in0*2
                    b_gpu = in1*2
                    c_gpu = AddOp(a_gpu, b_gpu).as_tensorflow()
                    squared_gpu = tf.square(c_gpu)
                result, result_gpu = sess.run([squared, squared_gpu])
                assert np.allclose(reference, result_gpu)
            else:
                result = sess.run([squared])

        assert np.allclose(reference, result)
def extract_features(ids, path, output_path, extractor, batch_size=64):
    images_names = dict()
    for p in listdir(path):
        image_id = int(p.split('_')[-1].split('.')[0])
        if image_id in ids:
            images_names[image_id] = p
    batch,names = [],[]
    with open(output_path,'w') as output_file:
        for idx,n in enumerate(images_names):
            p = join(path, images_names[n])
            batch.append(load_image(p))
            names.append(n)
            if len(batch)==batch_size:
                batch = np.stack(batch)
                feed_dict = {images: batch}
                with tf.device('/gpu:0'):
                    features = sess.run(extractor, feed_dict=feed_dict)
                for n,f in zip(names,features):
                    output_file.write("%s;%s\n" % (n, " ".join(str(x) for x in f)))
                print("%d/%d" % (idx,len(images_names)))
                batch, names = [],[]
                output_file.flush()
        if len(batch)>0:
            batch = np.stack(batch)
            feed_dict = {images: batch}
            with tf.device('/gpu:0'):
                features = sess.run(extractor, feed_dict=feed_dict)
            for n,f in zip(names,features):
                output_file.write("%s;%s\n" % (n, " ".join(str(x) for x in f)))
            print("%d/%d" % (idx,len(images_names)))
            output_file.flush()
 def testManyCPUs(self):
   run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
   run_metadata = tf.RunMetadata()
   config = tf.ConfigProto(device_count={'CPU': 3})
   with tf.Session(config=config) as sess:
     with tf.device('/cpu:0'):
       const1 = tf.constant(1.0, name='const1')
     with tf.device('/cpu:1'):
       const2 = tf.constant(2.0, name='const2')
     with tf.device('/cpu:2'):
       result = const1 + const2 + const1 * const2
     sess.run(result, options=run_options, run_metadata=run_metadata)
   self.assertTrue(run_metadata.HasField('step_stats'))
   step_stats = run_metadata.step_stats
   devices = [d.device for d in step_stats.dev_stats]
   self.assertTrue('/job:localhost/replica:0/task:0/cpu:0' in devices)
   self.assertTrue('/job:localhost/replica:0/task:0/cpu:1' in devices)
   self.assertTrue('/job:localhost/replica:0/task:0/cpu:2' in devices)
   tl = timeline.Timeline(step_stats)
   ctf = tl.generate_chrome_trace_format()
   self._validateTrace(ctf)
   tl = timeline.Timeline(step_stats)
   ctf = tl.generate_chrome_trace_format(show_dataflow=False)
   self._validateTrace(ctf)
   tl = timeline.Timeline(step_stats)
   ctf = tl.generate_chrome_trace_format(show_memory=False)
   self._validateTrace(ctf)
   tl = timeline.Timeline(step_stats)
   ctf = tl.generate_chrome_trace_format(show_memory=False,
                                         show_dataflow=False)
   self._validateTrace(ctf)
Exemple #15
0
def pack_range(key, packing, grad_vars, rng):
    """Form the concatenation of a specified range of gradient tensors.

  Args:
    key: Value under which to store meta-data in packing that will be used
      later to restore the grad_var list structure.
    packing: Dict holding data describing packed ranges of small tensors.
    grad_vars: List of (grad, var) pairs for one tower.
    rng: A pair of integers giving the first, last indices of a consecutive
      range of tensors to be packed.

  Returns:
    A tensor that is the concatenation of all the specified small tensors.
  """
    to_pack = grad_vars[rng[0]:rng[1] + 1]
    members = []
    variables = []
    restore_shapes = []
    with tf.name_scope('pack'):
        for g, v in to_pack:
            variables.append(v)
            restore_shapes.append(g.shape)
            with tf.device(g.device):
                members.append(tf.reshape(g, [-1]))
        packing[key] = GradPackTuple(
            indices=range(rng[0], rng[1] + 1),
            vars=variables,
            shapes=restore_shapes)
        with tf.device(members[0].device):
            return tf.concat(members, 0)
Exemple #16
0
  def testClearDevices(self):
    graph1 = tf.Graph()
    with graph1.as_default():
      with tf.device("/device:CPU:0"):
        a = tf.Variable(tf.constant(1.0, shape=[2, 2]), name="a")
      with tf.device("/job:ps/replica:0/task:0/gpu:0"):
        b = tf.Variable(tf.constant(2.0, shape=[2, 2]), name="b")
      with tf.device("/job:localhost/replica:0/task:0/cpu:0"):
        tf.matmul(a, b, name="matmul")

    self.assertEqual("/device:CPU:0", str(graph1.as_graph_element("a").device))
    self.assertEqual("/job:ps/replica:0/task:0/device:GPU:0",
                     str(graph1.as_graph_element("b").device))
    self.assertEqual("/job:localhost/replica:0/task:0/device:CPU:0",
                     str(graph1.as_graph_element("matmul").device))

    orig_meta_graph, _ = meta_graph.export_scoped_meta_graph(graph=graph1)

    graph2 = tf.Graph()
    with graph2.as_default():
      meta_graph.import_scoped_meta_graph(orig_meta_graph, clear_devices=True)

    self.assertEqual("", str(graph2.as_graph_element("a").device))
    self.assertEqual("", str(graph2.as_graph_element("b").device))
    self.assertEqual("", str(graph2.as_graph_element("matmul").device))
    def _build_word_embeddings(self):
        n_tokens_vocab = self.options['n_tokens_vocab']
        batch_size = self.options['batch_size']
        unroll_steps = self.options['unroll_steps']

        # LSTM options
        projection_dim = self.options['lstm']['projection_dim']

        # the input token_ids and word embeddings
        self.token_ids = tf.placeholder(DTYPE_INT,
                               shape=(batch_size, unroll_steps),
                               name='token_ids')
        # the word embeddings
        with tf.device("/cpu:0"):
            self.embedding_weights = tf.get_variable(
                "embedding", [n_tokens_vocab, projection_dim],
                dtype=DTYPE,
            )
            self.embedding = tf.nn.embedding_lookup(self.embedding_weights,
                                                self.token_ids)

        # if a bidirectional LM then make placeholders for reverse
        # model and embeddings
        if self.bidirectional:
            self.token_ids_reverse = tf.placeholder(DTYPE_INT,
                               shape=(batch_size, unroll_steps),
                               name='token_ids_reverse')
            with tf.device("/cpu:0"):
                self.embedding_reverse = tf.nn.embedding_lookup(
                    self.embedding_weights, self.token_ids_reverse)
def all_avg_gradients(tower_gradvars, devices, param_server_device='/gpu:0',
                      usenccl=True):
    if len(devices) == 1:
        return tower_gradvars

    num_devices = len(devices)
    avg_gradvars = []
    for layer in zip(*tower_gradvars):
        grads_on_devices, vars_on_devices = zip(*layer)
        if have_nccl and usenccl:
            # Note: These nccl ops _must_ be run on all devices, else deadlock
            # print('ALL_AVG_GRADIENTS GRADS_ON_DEVICES:',
            #       grads_on_devices)  # DEBUG
            avg_grads_on_devices = nccl.all_sum(grads_on_devices)
            for d, device in enumerate(devices):
                with tf.device(device):
                    avg_grads_on_devices[d] *= 1. / num_devices
        else:
            with tf.device(param_server_device):
                avg_grad = tf.reduce_mean(tf.stack(grads_on_devices), 0)
            avg_grads_on_devices = [avg_grad] * num_devices
        avg_gradvars_on_devices = zip(*(avg_grads_on_devices, vars_on_devices))
        avg_gradvars.append(avg_gradvars_on_devices)

    return list(zip(*avg_gradvars))
    def get_updates(self, loss, params):
        tower_gradvars = []
        gdev_list = self._gdev_list

        global_scope = tf.get_variable_scope()
        for idev, device in enumerate(gdev_list):
            with tf.device(device), \
                    tf.variable_scope(global_scope, reuse=idev > 0), \
                    tf.name_scope('tower_%i' % idev):
                grads = self.optimizer.compute_gradients(loss, params)

            gradvars = zip(grads, params)
            tower_gradvars.append(gradvars)

        tower_gradvars = all_avg_gradients(tower_gradvars,
                                           gdev_list,
                                           usenccl=False)

        self.updates = [K.update_add(self.iterations, 1)]

        for device_num, device in enumerate(gdev_list):
            with tf.device(device):
                gradvars = tower_gradvars[device_num]
                opt_update = self.optimizer.apply_gradients(
                    grads, global_step=self.iterations)
            self.updates.append(opt_update)

        return self.updates
    def test_single_output(self):

        @operator()
        def add(x, y):
            pos = position_in(x.shape)
            out = output_like(x)
            out[pos] = x[pos] + y[pos]
            return out

        in0 = np.random.random(5).astype(np.float32)
        in1 = np.random.random(5).astype(np.float32)
        reference = 4*(in0 + in1)*(in0 + in1)

        test_config = tf.ConfigProto(allow_soft_placement=False)
        # Don't perform optimizations for tests so we don't inadvertently run
        # gpu ops on cpu
        test_config.graph_options.optimizer_options.opt_level = -1
        with tf.Session(config=test_config) as sess:
            with tf.device('/cpu:0'):
                a = in0*2
                b = in1*2
                c = as_tensorflow(add(a, b))
                squared = tf.square(c)
            if cuda_enabled:
                with tf.device('/gpu:0'):
                    a_gpu = in0*2
                    b_gpu = in1*2
                    c_gpu = as_tensorflow(add(a_gpu, b_gpu))
                    squared_gpu = tf.square(c_gpu)
                result, result_gpu = sess.run([squared, squared_gpu])
                assert np.allclose(reference, result_gpu)
            else:
                result = sess.run([squared])

        assert np.allclose(reference, result)
Exemple #21
0
def computeLoss(predicted,labels,weights,withAverage=False):
	labels = tf.cast(labels, tf.int64)
	
	#rescale logits by weight of the classe
	weighted_logits = tf.mul(predicted,weights)

	#performs softmax on weighted logits and compute cross entropy 
	cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
		weighted_logits, labels, name='cross_entropy_per_example')
	
	#mean cross entropy for the mini batch
	cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
	with tf.device("/cpu:0"):
		tf.scalar_summary('cross_entropy', cross_entropy_mean)

	#add the cross entropy loss to losses
	tf.add_to_collection('losses',cross_entropy_mean)
	
	#total loss as sum of all the losses
	losses = tf.get_collection('losses')
	loss = tf.add_n(losses, name='total_loss')
	with tf.device("/cpu:0"):
		tf.scalar_summary('loss', loss)
	
	if withAverage:
		#get exponential moving average loss
		loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
		loss_averages_op = loss_averages.apply(losses+[loss])
		tf.scalar_summary('cross_entropy_running_average', loss_averages.average(loss))
		return loss_averages_op
	else:
		return loss
Exemple #22
0
def main(unused_argv):
  tf.logging.set_verbosity(FLAGS.log)

  if not tf.gfile.Exists(FLAGS.logdir):
    tf.gfile.MakeDirs(FLAGS.logdir)

  with tf.Graph().as_default():

    # If ps_tasks is 0, the local device is used. When using multiple
    # (non-local) replicas, the ReplicaDeviceSetter distributes the variables
    # across the different devices.
    model = utils.get_module("baseline.models.%s" % FLAGS.model)
    hparams = model.get_hparams(FLAGS.config)

    # Run the Reader on the CPU
    cpu_device = ("/job:worker/cpu:0" if FLAGS.ps_tasks else
                  "/job:localhost/replica:0/task:0/cpu:0")

    with tf.device(cpu_device):
      with tf.name_scope("Reader"):
        batch = reader.NSynthDataset(
            FLAGS.train_path, is_training=True).get_baseline_batch(hparams)

    with tf.device(tf.train.replica_device_setter(ps_tasks=FLAGS.ps_tasks)):
      train_op = model.train_op(batch, hparams, FLAGS.config)

      # Run training
      slim.learning.train(
          train_op=train_op,
          logdir=FLAGS.logdir,
          master=FLAGS.master,
          is_chief=FLAGS.task == 0,
          number_of_steps=hparams.max_steps,
          save_summaries_secs=FLAGS.save_summaries_secs,
          save_interval_secs=FLAGS.save_interval_secs)
  def __call__(self, getter, name, *args, **kwargs):
    staging_ops = self.variable_mgr.staging_vars_on_devices[self.device_num]
    if name in staging_ops:
      put_op, get_op = staging_ops[name]
      return get_op
    real_var = getter(name, *args, **kwargs)
    shape = kwargs['shape']
    dtype = kwargs['dtype']
    trainable = kwargs['trainable']
    if self.cpu_device:
      with tf.device(self.cpu_device):
        # This helps copying the weights from the parameter to this server only
        # once.
        if name in self.variable_mgr.staged_vars_on_cpu:
          cpu_var = self.variable_mgr.staged_vars_on_cpu[name]
        else:
          cpu_var = tf.identity(real_var)
          self.variable_mgr.staged_vars_on_cpu[name] = cpu_var
      var_to_stage = cpu_var
    else:
      var_to_stage = tf.identity(real_var)  # de-reference the variable.

    with tf.device(self.devices[self.device_num]):
      staging_area = data_flow_ops.StagingArea([dtype], shapes=[shape])
      put_op = staging_area.put([var_to_stage])
      get_op = staging_area.get()[0]
      staging_ops[name] = (put_op, get_op)
    if trainable:
      # For trainable variables, they are managed separatedly through
      # apply_gradients.
      return get_op
    else:
      # For other shadow variables, the access is decoupled through a wrapper
      # class.
      return StagedModelVariable(real_var, get_op, self.variable_mgr)
def create_weight_variables(shape, seed, name, use_gpu=False):
    """
    Create gaussian random neurons with mean 0 and std 0.1

    **Paramters**

      shape: Shape of the layer
    """

    #import ipdb; ipdb.set_trace()

    if len(shape) == 4:
        in_out = shape[0] * shape[1] * shape[2] + shape[3]
    else:
        in_out = shape[0] + shape[1]

    import math
    stddev = math.sqrt(3.0 / in_out) # XAVIER INITIALIZER (GAUSSIAN)

    initializer = tf.truncated_normal(shape, stddev=stddev, seed=seed)
    
    if use_gpu:
        with tf.device("/gpu"):
            return tf.get_variable(name, initializer=initializer, dtype=tf.float32)
    else:
        with tf.device("/cpu"):
            return tf.get_variable(name, initializer=initializer, dtype=tf.float32)
def all_sync_params(tower_params, devices, usenccl=True):
    """Assigns the params from the first tower to all others"""
    if len(devices) == 1:
        return tf.no_op()
    sync_ops = []
    if have_nccl and usenccl:
        for param_on_devices in zip(*tower_params):
            # print('PARAM_ON_DEVICES: {}'.format(param_on_devices))  # DEBUG
            # Note: param_on_devices is [paramX_gpu0, paramX_gpu1, ...]
            param0 = param_on_devices[0]
            send_op, received_tensors = nccl.broadcast(param0, devices[1:])
            sync_ops.append(send_op)
            for device, param, received in zip(devices[1:],
                                               param_on_devices[1:],
                                               received_tensors):
                with tf.device(device):
                    sync_op = param.assign(received)
                    sync_ops.append(sync_op)
    else:
        params0 = tower_params[0]
        for device, params in zip(devices, tower_params):
            with tf.device(device):
                for param, param0 in zip(params, params0):
                    sync_op = param.assign(param0.read_value())
                    sync_ops.append(sync_op)

    return tf.group(*sync_ops)
Exemple #26
0
def all_reduce_gradients(tower_grads, devices):
    average_grads = []
    for grad_and_vars in zip(*tower_grads):
        # Note that each grad_and_vars looks like the following:
        #   ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
        grads = []
        split_grads = []
        assert len(grad_and_vars) == FLAGS.num_workers
        # Each GPU splits its own grad
        for i, (g, _) in enumerate(grad_and_vars):
            with tf.device(devices[i]):
                split_grads.append(tf.split(0, FLAGS.num_workers, g))
        # Each GPU gatheres slices of grad from other GPUs to do average.
        for i, dev in enumerate(devices):
            with tf.device(dev):
                x = split_grads[i][i]
                for j in range(FLAGS.num_workers):
                    if i == j:
                        continue
                    x += split_grads[j][i]
                grads.append(x / FLAGS.num_workers)
        grad = tf.concat(0, grads)

        # Keep in mind that the Variables are redundant because they are shared
        # across towers. So .. we will just return the first tower's pointer to
        # the Variable.
        v = grad_and_vars[0][1]
        grad_and_var = (grad, v)
        average_grads.append(grad_and_var)
    return average_grads
Exemple #27
0
def train():
    hyperparams = {'batch_size': 50,
                   'learning_rate': 0.0001,
                   'grad_decay': 0.95,
                   'grad_epsilon': 0.01,
                   'num_updates': 20000,
                   'grad_norm_clip': 5}
    with tf.device('/cpu:0'):
        model = TradingSystemsModel(hyperparams)
    loss = tb.Crossentropy(hyperparams)
    acc = tb.CatAcc(hyperparams)
    evaluator = tb.Evaluator(hyperparams, loss, acc)
    optim = tb.RMSPropOptim(hyperparams)
    trainer = tb.Trainer(model, hyperparams, loss, optim, evaluator)

    split = 90000
    data = np.load('data/trading-systems.npz')
    print(data['ticks'].shape)
    train_xs = {'ticks': data['ticks'][:split]}
    train_y = data['targets'][:split]

    val_xs = {'ticks': data['ticks'][split:]}
    val_y = data['targets'][split:]

    with tf.device('/cpu:0'):
        trainer.train(train_xs, train_y,
                      val_xs, val_y,
                      val_cmp=True)
    evaluator.eval(model, val_xs, val_y)
Exemple #28
0
def prepare_networks(gpu,image_batch, nb_cl, nb_groups):
  mean_img = tf.constant([123.68, 116.779, 103.939], dtype=tf.float32, shape=[1, 1, 1, 3], name='img_mean')
  scores   = []
  with tf.variable_scope('ResNet18'):
    with tf.device('/gpu:' + gpu):
        score = utils_resnet.ResNet18(image_batch-mean_img, phase='train',num_outputs=nb_cl*nb_groups)
        scores.append(score)
    
    scope = tf.get_variable_scope()
    scope.reuse_variables()
  
  # First score and initialization
  variables_graph = tf.get_collection(tf.GraphKeys.WEIGHTS, scope='ResNet18')
  scores_stored   = []
  with tf.variable_scope('store_ResNet18'):
    with tf.device('/gpu:' + gpu):
        score = utils_resnet.ResNet18(image_batch-mean_img, phase='test',num_outputs=nb_cl*nb_groups)
        scores_stored.append(score)
    
    scope = tf.get_variable_scope()
    scope.reuse_variables()
  
  variables_graph2 = tf.get_collection(tf.GraphKeys.WEIGHTS, scope='store_ResNet18')
  
  return variables_graph,variables_graph2,scores,scores_stored
Exemple #29
0
  def _add_shared_train_op(self):
    """Sets self._train_op, the op to run for training."""
    # Take gradients of the trainable variables w.r.t. the loss function to minimize
    if self._hps.rl_training or self._hps.ac_training:
      loss_to_minimize = self._reinforce_shared_loss
      if self._hps.coverage:
        loss_to_minimize = self._reinforce_cov_total_loss
    else:
      loss_to_minimize = self._pgen_loss
      if self._hps.coverage:
        loss_to_minimize = self._pointer_cov_total_loss

    tvars = tf.trainable_variables()
    gradients = tf.gradients(loss_to_minimize, tvars, aggregation_method=tf.AggregationMethod.EXPERIMENTAL_TREE)

    # Clip the gradients
    with tf.device("/gpu:{}".format(self._hps.gpu_num)):
      grads, global_norm = tf.clip_by_global_norm(gradients, self._hps.max_grad_norm)

    # Add a summary
    tf.summary.scalar('global_norm', global_norm)

    # Apply adagrad optimizer
    optimizer = tf.train.AdagradOptimizer(self._hps.lr, initial_accumulator_value=self._hps.adagrad_init_acc)
    with tf.device("/gpu:{}".format(self._hps.gpu_num)):
      self._shared_train_op = optimizer.apply_gradients(zip(grads, tvars), global_step=self.global_step, name='train_step')
    def build_generator(self):
	
	# placeholder is for feeding data
	image = tf.placeholder(tf.float32, [self.batch_size, self.dim_image])  # (batch_size, dim_image)
	local_image = tf.placeholder(tf.float32, [self.batch_size, self.dim_image])
	query = tf.placeholder(tf.int32, [self.batch_size, MAX_QUERY_WORDS])
	query_mask = tf.placeholder(tf.float32, [self.batch_size, MAX_QUERY_WORDS])
	bbox = tf.placeholder(tf.float32, [self.batch_size, self.dim_coordinates])

	# [image] embed image feature to dim_hidden
        image_emb = tf.nn.bias_add(tf.matmul(image, self.embed_image_W), self.embed_image_b) # (batch_size, dim_hidden)
	local_image_emb = tf.nn.bias_add(tf.matmul(local_image, self.embed_local_W), self.embed_local_b) # (batch_size, dim_hidden)
	
        score = tf.zeros([self.batch_size], tf.float32)

	state_lang = tf.zeros([self.batch_size, self.lstm_lang.state_size])
	state_context = tf.zeros([self.batch_size, self.lstm_context.state_size])
	state_local = tf.zeros([self.batch_size, self.lstm_local.state_size])
	query_emb = tf.zeros([self.batch_size, self.dim_hidden])
	for j in range(MAX_QUERY_WORDS): 


	    # language lstm
            with tf.variable_scope("lstm_lang"):
                output_lang, state_lang = self.lstm_lang(query_emb, state_lang)
            lang = tf.slice(state_lang, [0,0], [self.batch_size, self.dim_hidden])
            # context lstm

            with tf.variable_scope("lstm_context"):
                output_context, state_context = self.lstm_context(tf.concat(1,[image_emb, lang]), state_context)
            context = tf.slice(state_context, [0,0], [self.batch_size, self.dim_hidden])

            # local lstm
            with tf.variable_scope("lstm_local"):
                output_local, state_local = self.lstm_local(tf.concat(1,[local_image_emb, lang, bbox]), state_local)
            local = tf.slice(state_local, [0,0], [self.batch_size, self.dim_hidden])

            context_emb = tf.nn.xw_plus_b(context, self.W_context, self.B_context)
            local_emb = tf.nn.xw_plus_b(local, self.W_local, self.B_local)
            word_pred = tf.add(context_emb, local_emb)

	    max_prob_index = tf.argmax(word_pred, 1) # b

	    labels = tf.expand_dims(query[:,j], 1)
            indices = tf.expand_dims(tf.range(0, self.batch_size, 1), 1)
            concated = tf.concat(1, [indices, labels])
            with tf.device('/cpu:0'):
                onehot_labels = tf.sparse_to_dense(concated, tf.pack([self.batch_size, self.dict_words]), 1.0, 0.0)
	    current_score = tf.mul(onehot_labels, word_pred)
	    current_score = tf.reduce_sum(current_score, 1)
	    current_score = tf.mul(current_score, query_mask[:,j])
	    current_score = tf.reshape(current_score, [1,self.batch_size])
	    current_score = tf.nn.softmax(current_score)
	    score = tf.add(score, current_score)

            with tf.device("/cpu:0"):
                tf.get_variable_scope().reuse_variables()
                query_emb = tf.nn.embedding_lookup(self.query_emb_W, max_prob_index)

	return score, image, local_image, query, query_mask, bbox
Exemple #31
0
    def __init__(
            self, sequence_length, num_classes, vocab_size,
            embedding_size, filter_sizes, num_filters, l2_reg_lambda=0.0):
        # Placeholders for input, output and dropout
        self.input_x = tf.placeholder(tf.int32, [None, sequence_length], name="input_x")
        self.input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y")
        self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")

        # Keeping track of l2 regularization loss (optional)
        l2_loss = tf.constant(0.0)

        # Embedding layer
        """
        <Variable>
            - W: 각 단어의 임베디드 벡터의 성분을 랜덤하게 할당
        """
        #with tf.device('/gpu:0'), tf.name_scope("embedding"):
        with tf.device('/cpu:0'), tf.name_scope("embedding"):
            W = tf.Variable(
                tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0),
                name="W")
            self.embedded_chars = tf.nn.embedding_lookup(W, self.input_x)
            self.embedded_chars_expanded = tf.expand_dims(self.embedded_chars, -1)

        # Create a convolution + maxpool layer for each filter size
        pooled_outputs = []
        for i, filter_size in enumerate(filter_sizes):
            with tf.name_scope("conv-maxpool-%s" % filter_size):
                # Convolution Layer
                filter_shape = [filter_size, embedding_size, 1, num_filters]
                W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
                b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
                conv = tf.nn.conv2d(
                    self.embedded_chars_expanded,
                    W,
                    strides=[1, 1, 1, 1],
                    padding="VALID",
                    name="conv")
                # Apply nonlinearity
                h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
                # Maxpooling over the outputs
                pooled = tf.nn.max_pool(
                    h,
                    ksize=[1, sequence_length - filter_size + 1, 1, 1],
                    strides=[1, 1, 1, 1],
                    padding='VALID',
                    name="pool")
                pooled_outputs.append(pooled)


        # Combine all the pooled features
        num_filters_total = num_filters * len(filter_sizes)

        self.h_pool = tf.concat(3, pooled_outputs)
        self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total])

        # Add dropout
        with tf.name_scope("dropout"):
            self.h_drop = tf.nn.dropout(self.h_pool_flat, self.dropout_keep_prob)

        # Final (unnormalized) scores and predictions
        with tf.name_scope("output"):
            W = tf.get_variable(
                "W",
                shape=[num_filters_total, num_classes],
                initializer=tf.contrib.layers.xavier_initializer())
            b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b")
            l2_loss += tf.nn.l2_loss(W)
            l2_loss += tf.nn.l2_loss(b)
            self.scores = tf.nn.xw_plus_b(self.h_drop, W, b, name="scores")    # xw_plus_b = matmul(x, W) + b
            self.predictions = tf.argmax(self.scores, 1, name="predictions")

        # Calculate Mean cross-entropy loss
        with tf.name_scope("loss"):
            losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.scores, labels=self.input_y)

            self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss

        # Accuracy
        with tf.name_scope("accuracy"):
            correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
Exemple #32
0
    def __init__(self, is_trainning, embeddings, config):
        self.is_trainning = is_trainning
        self.hidden_num = hidden_num = config.hidden_num
        self.seq_length = seq_length = config.max_length
        self.class_num = class_num = config.class_num

        # init placeholder
        self.text_a = tf.placeholder(tf.int32, [None, seq_length],
                                     name='text_a')
        self.text_b = tf.placeholder(tf.int32, [None, seq_length],
                                     name='text_b')
        self.y = tf.placeholder(tf.int32, [None, class_num], name='y')
        # real length
        self.a_length = tf.placeholder(tf.int32, [None], name='a_length')
        self.b_length = tf.placeholder(tf.int32, [None], name='b_length')
        self.dropout_keep_prob = tf.placeholder(tf.float32,
                                                name="dropout_keep_prob")

        # embedding layers
        with tf.device('/cpu:0'), tf.name_scope("embedding"):
            self.vocab_matrix = tf.Variable(embeddings, trainable=False)
            self.text_a_embed = tf.nn.embedding_lookup(self.vocab_matrix,
                                                       self.text_a)
            self.text_b_embed = tf.nn.embedding_lookup(self.vocab_matrix,
                                                       self.text_b)

        # Input Encoding
        with tf.name_scope('Input_Encoding'):
            a_bar = self.biLSTMBlock(self.text_a_embed, hidden_num,
                                     'Input_Encoding/biLSTM', self.a_length)
            b_bar = self.biLSTMBlock(self.text_b_embed,
                                     hidden_num,
                                     'Input_Encoding/biLSTM',
                                     self.b_length,
                                     isreuse=True)

        # Local Inference Modeling
        with tf.name_scope('Local_inference_Modeling'):
            # 计算a_bar与b_bar每个词语之间的相似度
            with tf.name_scope('word_similarity'):
                attention_weights = tf.matmul(a_bar,
                                              tf.transpose(b_bar, [0, 2, 1]))
                attentionsoft_a = tf.nn.softmax(attention_weights)
                attentionsoft_b = tf.nn.softmax(
                    tf.transpose(attention_weights, [0, 2, 1]))
                a_hat = tf.matmul(attentionsoft_a, b_bar)
                b_hat = tf.matmul(attentionsoft_b, a_bar)

            # 计算m_a, m_b
            with tf.name_scope("compute_m_a/m_b"):
                a_diff = tf.subtract(a_bar, a_hat)
                a_mul = tf.multiply(a_bar, a_hat)

                b_diff = tf.subtract(b_bar, b_hat)
                b_mul = tf.multiply(b_bar, b_hat)

                # m_a = [a_bar, a_hat, a_bar - a_hat, a_bar 'dot' a_hat] (14)
                # m_b = [b_bar, b_hat, b_bar - b_hat, b_bar 'dot' b_hat] (15)
                self.m_a = tf.concat([a_bar, a_hat, a_diff, a_mul], axis=2)
                self.m_b = tf.concat([b_bar, b_hat, b_diff, b_mul], axis=2)

        with tf.name_scope("Inference_Composition"):
            v_a = self.biLSTMBlock(self.m_a, hidden_num,
                                   'Inference_Composition/biLSTM',
                                   self.a_length)
            v_b = self.biLSTMBlock(self.m_b,
                                   hidden_num,
                                   'Inference_Composition/biLSTM',
                                   self.b_length,
                                   isreuse=True)

            # average pool and max pool
            v_a_avg = tf.reduce_mean(v_a, axis=1)
            v_b_avg = tf.reduce_mean(v_b, axis=1)
            v_a_max = tf.reduce_max(v_a, axis=1)
            v_b_max = tf.reduce_max(v_b, axis=1)

            v = tf.concat([v_a_avg, v_a_max, v_b_avg, v_b_max], axis=1)

        with tf.name_scope("output"):
            initializer = tf.random_normal_initializer(0.0, 0.1)
            with tf.variable_scope('feed_foward_layer1'):
                inputs = tf.nn.dropout(v, self.dropout_keep_prob)
                outputs = tf.layers.dense(inputs,
                                          hidden_num,
                                          tf.nn.relu,
                                          kernel_initializer=initializer)
            with tf.variable_scope('feed_foward_layer2'):
                outputs = tf.nn.dropout(outputs, self.dropout_keep_prob)
                self.logits = tf.layers.dense(outputs,
                                              class_num,
                                              tf.nn.tanh,
                                              kernel_initializer=initializer)
            # x = tf.Variable(tf.constant(2.0, shape=[32, 1]), dtype=tf.float32)
            # x = tf.constant(2.0, shape=[32, 1], dtype=tf.float32)
            # logits0, logits1 = tf.split(self.logits, [1, 1], 1)
            # self.logits_new = tf.concat([logits0, tf.multiply(logits1, x)], axis=1)

            self.score = tf.nn.softmax(self.logits, name='score')
            self.prediction = tf.argmax(self.score, 1, name="prediction")

        with tf.name_scope('cost'):
            self.cost = tf.nn.softmax_cross_entropy_with_logits_v2(
                labels=self.y, logits=self.logits)
            self.cost = tf.reduce_mean(self.cost)
            weights = [
                v for v in tf.trainable_variables()
                if ('w' in v.name) or ('kernel' in v.name)
            ]
            l2_loss = tf.add_n([tf.nn.l2_loss(w)
                                for w in weights]) * config.l2_lambda
            self.loss = l2_loss + self.cost

        self.accuracy = tf.reduce_mean(
            tf.cast(tf.equal(tf.argmax(self.y, axis=1), self.prediction),
                    tf.float32))

        if not is_trainning:
            return

        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars), 5)

        optimizer = tf.train.AdamOptimizer(config.learning_rate)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
Exemple #33
0
    def __init__(self, num_users, num_items, num_ratings, embedding_dim,
                 reg_lambda):

        assert num_users >= 1
        self.num_users = num_users
        assert num_items >= 1
        self.num_items = num_items
        assert num_ratings >= 1
        self.num_ratings = num_ratings
        assert embedding_dim >= 1
        self.embedding_dim = embedding_dim
        assert reg_lambda >= 0

        # Placeholders for input, output and dropout
        self.input_user_ids = tf.placeholder(tf.int32, [None],
                                             name="input_user_ids")
        self.input_per_user_count = tf.placeholder(tf.int32, [None],
                                                   name="input_per_user_count")
        self.input_per_user_item_ids = tf.placeholder(
            tf.int32, [None, None], name="input_per_user_item_ids")
        self.input_per_user_ratings = tf.placeholder(
            tf.float32, [None, None], name="input_per_user_ratings")
        self.input_per_user_neg_ids = tf.placeholder(
            tf.int32, [None, None], name="input_per_user_neg_ids")

        num_users = tf.shape(self.input_user_ids)[0]
        batch_size = tf.reduce_sum(self.input_per_user_count)
        asrt1 = tf.assert_equal(num_users,
                                tf.shape(self.input_per_user_count)[0])
        asrt2 = tf.assert_equal(num_users,
                                tf.shape(self.input_per_user_item_ids)[0])
        asrt3 = tf.assert_equal(num_users,
                                tf.shape(self.input_per_user_ratings)[0])
        asrt4 = tf.assert_equal(num_users,
                                tf.shape(self.input_per_user_neg_ids)[0])

        # pu = per_user

        pu_mask = tf.sequence_mask(self.input_per_user_count, dtype=tf.float32)

        # embedding lookup layer
        with tf.device('/cpu:0'), tf.name_scope(
                'embedding_lookup'), tf.control_dependencies(
                    [asrt1, asrt2, asrt3, asrt4]):
            # get dimension of user_ids to match the per_user_* stuff
            expanded_user_ids = tf.expand_dims(self.input_user_ids, 1)
            expanded_user_em = embedding_lookup_layer(expanded_user_ids,
                                                      self.num_users,
                                                      self.embedding_dim,
                                                      'user_embedding')
            pu_item_em = embedding_lookup_layer(self.input_per_user_item_ids,
                                                self.num_items,
                                                self.embedding_dim,
                                                'item_embedding')
            pu_neg_em = embedding_lookup_layer(self.input_per_user_neg_ids,
                                               self.num_items,
                                               self.embedding_dim,
                                               'item_embedding',
                                               reuse=True)
            pu_item_bias = bias_lookup_layer(self.input_per_user_item_ids,
                                             self.num_items, 'item_embedding')
            pu_neg_bias = bias_lookup_layer(self.input_per_user_neg_ids,
                                            self.num_items,
                                            'item_embedding',
                                            reuse=True)

        with tf.name_scope('bpr'):
            pu_em_delta = pu_item_em - pu_neg_em
            pu_bias_delta = pu_item_bias - pu_neg_bias
            pu_prediction_delta = tf.reduce_sum(expanded_user_em * pu_em_delta,
                                                axis=-1) + pu_bias_delta
            self.bpr_loss = pu_mask * tf.log(
                tf.sigmoid(-pu_prediction_delta) + 0.01)  # TODO: log?

        # regularization
        with tf.name_scope('regularization'):
            self.reg_loss = (reg_lambda) / 2 * sum(
                tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))

        # loss
        with tf.name_scope('loss'):
            self.loss = tf.reduce_mean(self.bpr_loss) + self.reg_loss
def train(point_cloud_data):
    with tf.Graph().as_default():
        with tf.device('/gpu:' + str(0)):
            #get the place holders
            point_clouds_ph, rot_ph = MODEL.placeholder_inputs(32, 1024)

            # is training place holder..
            is_training_ph = tf.placeholder(tf.bool, shape=())
            print(is_training_ph)

            batch = tf.Variable(0)
            bn_decay = get_bn_decay(batch)
            tf.summary.scalar('bn decay', bn_decay)
            print(bn_decay)

            #get model and loss
            pred = MODEL.get_model(point_clouds_ph, is_training_ph, bn_decay)
            loss, mat_diff_sum = MODEL.get_loss(pred, rot_ph)
            tf.summary.scalar("loss", loss)
            print(pred)
            print(loss)

            #correct = tf.equal(tf.argmax(pred, 1), tf.to_int64(rot_ph))
            #accuracy = tf.reduce_sum(tf.cast(correct, tf.float32) / float(BATCH_SIZE))
            #tf.summary.scalar("accuracy", accuracy)

            learning_rate = get_learning_rate(batch)
            tf.summary.scalar('learning rate', learning_rate)

            optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=MOMENTUM)
            train_op = optimizer.minimize(loss, global_step=batch)

        saver = tf.train.Saver()

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        config.allow_soft_placement = True
        config.log_device_placement = False
        sess = tf.Session(config = config)

        # add summary writers..
        merged = tf.summary.merge_all()
        train_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, "train_test"), sess.graph)

        # init variables
        init = tf.global_variables_initializer()
        sess.run(init, {is_training_ph:True})

        ops = {'pointclouds_pl': point_clouds_ph,
               'rot_ph': rot_ph,
               'is_training_pl': is_training_ph,
               'pred': pred,
               'loss': loss,
               "mat_diff_sum":mat_diff_sum,
               'train_op': train_op,
               'merged': merged,
               'step': batch,
               'point_cloud_data' : point_cloud_data}

        for epoch in range(MAX_EPOCH):
            log_string('-------------- EPOCH %03d ---------------------' % (epoch))
            sys.stdout.flush()

            train_one_epoch(sess, ops, train_writer)


            save_path = saver.save(sess, os.path.join(LOG_DIR, "reg_model"), global_step=epoch )
            log_string("Model saved in file: %s" % save_path)
Exemple #35
0
def train_model(config, environ, train_data, test_data, trainval_data=None):
  """Trains a CIFAR model.

  Args:
      config: Config object
      environ: Environ object
      train_data: Dataset object
      test_data: Dataset object

  Returns:
      acc: Final test accuracy
  """
  np.random.seed(0)
  if not hasattr(config, "seed"):
    tf.set_random_seed(1234)
    log.info("Setting tensorflow random seed={:d}".format(1234))
  else:
    log.info("Setting tensorflow random seed={:d}".format(config.seed))
    tf.set_random_seed(config.seed)
  if environ.verbose:
    verbose_level = 0
  else:
    verbose_level = 2

  if trainval_data is None:
    trainval_data = train_data

  log.info("Environment: {}".format(environ.__dict__))
  log.info("Config: {}".format(config.__dict__))

  save_folder = os.path.join(environ.save_folder, environ.exp_id)
  logs_folder = os.path.join(environ.logs_folder, environ.exp_id)
  with log.verbose_level(verbose_level):
    exp_logger = ExperimentLogger(logs_folder)

    if not hasattr(config, "seed"):
      data_seed = 0
    else:
      data_seed = config.seed

    # Gets data iterators.
    train_iter = get_iter(
        train_data,
        batch_size=config.batch_size,
        shuffle=True,
        cycle=True,
        prefetch=config.prefetch,
        seed=data_seed,
        num_worker=25,
        queue_size=500)
    trainval_iter = get_iter(
        train_data,
        batch_size=config.batch_size,
        shuffle=True,
        cycle=True,
        prefetch=config.prefetch,
        num_worker=10,
        queue_size=200)
    test_iter = get_iter(
        test_data,
        batch_size=config.batch_size,
        shuffle=False,
        cycle=False,
        prefetch=config.prefetch,
        num_worker=10,
        queue_size=200)

    # Builds models.
    log.info("Building models")
    with tf.name_scope("Train"):
      with tf.variable_scope("Model", reuse=None):
        with tf.device(environ.device):
          m = CNNModelSR(config, is_training=True)

    with tf.name_scope("Valid"):  # also include testing in this graph
      with tf.variable_scope("Model", reuse=True):
        with tf.device(environ.device):
          mvalid = CNNModelSR(config, is_training=False)

    # Initializes variables.
    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
      saver = tf.train.Saver()
      sess.run(tf.global_variables_initializer())

      def train_step():
        """Train step."""
        batch = train_iter.next()

        feed_data = {
            m.input:
            np.expand_dims(batch["img"], axis=3),
            m.label:
            np.expand_dims(
                ut.crop_img(batch["label"], config.crop_border), axis=3)
        }

        cost, l2_loss, _ = sess.run(
            [m.cost, m.l2_loss, m.train_op], feed_dict=feed_data)
        return l2_loss

      def evaluate(data_iter, nbatches):
        """Runs evaluation."""
        count = 0
        PSNR = 0.0
        SSIM = 0.0
        crop_border = config.crop_border

        if nbatches == -1:
          iter_ = data_iter
        else:
          iter_ = range(nbatches)

        for bb in iter_:
          if nbatches == -1:
            batch = bb
          else:
            batch = data_iter.next()

          # deal with gray images
          is_rgb_img = False if len(batch["img"].shape) < 3 else True

          if not is_rgb_img:
            img_y = batch["img"]
            label_y = batch["label"]
          else:
            # note Matlab format is Ycbcr
            img_y = batch["img"][:, :, 0]
            img_cb = batch["img"][:, :, 1]
            img_cr = batch["img"][:, :, 2]
            label_y = batch["label"][:, :, 0]

          label_y = ut.crop_img(label_y, crop_border)
          feed_data = {
              mvalid.input:
              np.expand_dims(np.expand_dims(img_y, axis=0), axis=3)
          }

          output_img = sess.run(mvalid.output, feed_dict=feed_data)
          output_img = ut.clip_img(np.squeeze(output_img *
                                              255.0))  # clip pixel value
          PSNR += ut.compute_psnr(output_img, label_y)
          SSIM += ut.compute_ssim(output_img, label_y)

          if not is_rgb_img:
            save_input_img = ut.crop_img(
                ut.post_process(img_y * 255.0), crop_border)
            save_output_img = ut.post_process(output_img)
          else:
            save_input_img = np.zeros_like(batch["img"])
            # note OpenCV format is Ycrcb
            save_input_img[:, :, 0] = ut.clip_img(img_y * 255.0)
            save_input_img[:, :, 1] = img_cr
            save_input_img[:, :, 2] = img_cb

            save_input_img = ut.crop_img(
                ut.post_process(
                    cv2.cvtColor(
                        save_input_img.astype(np.uint8), cv2.COLOR_YCR_CB2BGR)),
                crop_border)

            save_output_img = np.zeros_like(save_input_img)
            save_output_img[:, :, 0] = output_img
            save_output_img[:, :, 1] = img_cr[crop_border:-crop_border,
                                              crop_border:-crop_border]
            save_output_img[:, :, 2] = img_cb[crop_border:-crop_border,
                                              crop_border:-crop_border]

            save_output_img = ut.post_process(
                cv2.cvtColor(
                    save_output_img.astype(np.uint8), cv2.COLOR_YCR_CB2BGR))

          cv2.imwrite(
              os.path.join(save_folder,
                           "test_input_{:05d}.png".format(count + 1)),
              save_input_img)

          cv2.imwrite(
              os.path.join(save_folder,
                           "test_output_{:05d}.png".format(count + 1)),
              save_output_img)

          count += 1

        PSNR /= count
        SSIM /= count

        return PSNR, SSIM

      def save():
        """Snapshots a model."""
        if not os.path.isdir(save_folder):
          os.makedirs(save_folder)
          config_file = os.path.join(save_folder, "conf.json")
          environ_file = os.path.join(save_folder, "env.json")
          with open(config_file, "w") as f:
            f.write(config.to_json())
          with open(environ_file, "w") as f:
            f.write(environ.to_json())
        log.info("Saving to {}".format(save_folder))
        saver.save(
            sess,
            os.path.join(save_folder, "model.ckpt"),
            global_step=m.global_step)

      def train():
        """Train loop."""
        lr = config.base_learn_rate
        lr_decay_steps = config.lr_decay_steps
        max_train_iter = config.max_train_iter
        m.assign_lr(sess, lr)

        if environ.verbose:
          loop = range(max_train_iter)
        else:
          loop = pb.get(max_train_iter)

        for niter in loop:
          # decrease learning rate
          if len(lr_decay_steps) > 0:
            if (niter + 1) == lr_decay_steps[0]:
              lr *= 0.1
              m.assign_lr(sess, lr)
              lr_decay_steps.pop(0)
          l2_loss = train_step()
          if (niter + 1) % config.disp_iter == 0 or niter == 0:
            exp_logger.log_train_loss(niter, l2_loss)
          if (niter + 1) % config.valid_iter == 0 or niter == 0:
            log.info("Experment ID {}".format(environ.exp_id))
            test_iter.reset()
            psnr, ssim = evaluate(test_iter, -1)
            exp_logger.log_valid_psnr(niter, psnr)
            exp_logger.log_valid_ssim(niter, ssim)
          if (niter + 1) % config.save_iter == 0:
            save()
        test_iter.reset()
        psnr, ssim = evaluate(test_iter, -1)
        return psnr, ssim

      psnr, ssim = train()
  return psnr, ssim
conv2_fmaps = 64
conv2_ksize = 3
conv2_stride = 1
conv2_pad = "SAME"
conv2_dropout_rate = 0.25

pool3_fmaps = conv2_fmaps

n_fc1 = 128
fc1_dropout_rate = 0.5

n_outputs = 10

reset_graph()

with tf.device('/GPU:0'):
    with tf.name_scope("inputs"):
        X = tf.placeholder(tf.float32, shape=[None, n_inputs], name="X")
        X_reshaped = tf.reshape(X, shape=[-1, height, width, channels])
        y = tf.placeholder(tf.int32, shape=[None], name="y")
        training = tf.placeholder_with_default(False, shape=[], name='training')
    
    conv1 = tf.layers.conv2d(X_reshaped, filters=conv1_fmaps, kernel_size=conv1_ksize,
                             strides=conv1_stride, padding=conv1_pad,
                             activation=tf.nn.relu, name="conv1")

    conv2 = tf.layers.conv2d(conv1, filters=conv2_fmaps, kernel_size=conv2_ksize,
                             strides=conv2_stride, padding=conv2_pad,
                             activation=tf.nn.relu, name="conv2")
    
    with tf.name_scope("pool3"):
Exemple #37
0
def create_train_op(model_config, inputs, opt, num_gpus=1, histograms=False):
    with tf.get_default_graph().as_default(), tf.device('/cpu:0'):
        tower_grads = []
        model = None
        losses = []
        total_loss = []
        global_step = slim.get_or_create_global_step()
        with tf.variable_scope(tf.get_variable_scope()):
            for i in xrange(num_gpus):
                with tf.device('/gpu:%d' % i):
                    with tf.name_scope('%s_%d' % (TOWER_NAME, i)) as scope:
                        # Calculate the loss for one tower of the CIFAR model. This function
                        # constructs the entire CIFAR model but shares the variables across
                        # all towers.
                        losses, total_loss, model = tower_loss(model_config, inputs, scope, is_train=True)
                        # Reuse variables for the next tower.
                        tf.get_variable_scope().reuse_variables()

                        # Calculate the gradients for the batch of data on this CIFAR tower.
                        grads = opt.compute_gradients(total_loss)

                        # Keep track of the gradients across all towers.
                        tower_grads.append(grads)

        summaries = []
        for l in losses + [total_loss]:
            # Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training
            # session. This helps the clarity of presentation on tensorboard.
            loss_name = l.op.name
            loss_summary = tf.summary.scalar(loss_name, l)
            summaries.append(loss_summary)
        # We must calculate the mean of each gradient. Note that this is the
        # synchronization point across all towers.
        grads = average_gradients(tower_grads)

        # Add histograms for gradients.
        if histograms:
            for grad, var in grads:
                if grad is not None:
                    summaries.append(tf.summary.histogram(var.op.name + '/gradients', grad))

        # Apply the gradients to adjust the shared variables.
        apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)

        # Add histograms for trainable variables.
        if histograms:
            for var in tf.trainable_variables():
                summaries.append(tf.summary.histogram(var.op.name, var))

        # Track the moving averages of all trainable variables.
        variable_averages = tf.train.ExponentialMovingAverage(
            MOVING_AVERAGE_DECAY, global_step)
        variables_averages_op = variable_averages.apply(tf.trainable_variables())

        # Group all updates to into a single train op.
        train_op = tf.group(apply_gradient_op, variables_averages_op)

        with tf.name_scope('train_op'):
            # Ensure the train_tensor computes grad_updates.
            train_op = with_dependencies([train_op], total_loss)

        return train_op, model, summaries
Exemple #38
0
    def __init__(self, conf, num_quantized_chars):
        self.input_x_d = tf.placeholder(tf.int32, [None, conf.max_char_length_d], name="input_x_d")
        self.input_x_se = tf.placeholder(tf.int32, [None, conf.max_char_length_s], name="input_x_se")

        self.training = tf.placeholder(tf.int32, name="trainable")

        self.list_d_s = [self.input_x_d, self.input_x_se]
        if self.training == 0:
            TRAIN = False
        else:
            TRAIN = True


        self.l2_loss = tf.constant(0.0)
        self.W0 = tf.get_variable("W", [num_quantized_chars, conf.embedding_size],)
        self.all_conv_1 = []
        norm = tf.random_normal_initializer(stddev=0.1)
        for i in range(len(self.list_d_s)):
            with tf.variable_scope('%d' % (i)):
                with tf.device('/cpu:0'), tf.name_scope("embedding"):
                    self.embedded_characters = tf.nn.embedding_lookup(self.W0, self.list_d_s[i])
                    self.embedded_characters_expanded = tf.expand_dims(self.embedded_characters, -1, name="embedding_input")

                with tf.variable_scope('layer_0'):
                    filter_shape0 = [conf.filter_size, conf.embedding_size, 1, 64]
                    strides0 = [1, 1, conf.embedding_size, 1]
                    self.filter_0 = tf.get_variable('filter1', filter_shape0, initializer=norm)
                    self.h0 = Conv(self.embedded_characters_expanded, self.filter_0, strides0, TRAIN, 'layer_0')
                    self.all_conv_1.append(self.h0)
                '''
                with tf.variable_scope('layer_1-2'):
                    self.h1 = Convolutional_Block(self.h0, 64, TRAIN, 'layer_1-2')
                    #self.pooled_1 = tf.nn.max_pool(self.h1, ksize=[1, conf.filter_size, 1, 1], strides=[1, 2, 1, 1], padding='SAME',
                    #                 name="pool1")
                    self.all_conv_1.append(self.h1)
                '''
        with tf.name_scope('att_layer_3-8'):

            #part of domain classificatin
            #attention_1
            A = distance(self.all_conv_1[0], self.all_conv_1[1])
            #print (type(self.h1.shape[3]),type(self.all_conv_1[1].shape[1]))
            A_4_input, B_4_input = attention_process_1(A, self.all_conv_1[0], self.all_conv_1[1],'w1_0', 'w1_1')
            A_4_feature,_, _ = Convolutional_Block(A_4_input,64,None,None,TRAIN, 'a1-cnn')
            B_4_feature,_,_ = Convolutional_Block(B_4_input,64,None,None,TRAIN, 'b1-cnn')
            A_1 = distance(A_4_feature, B_4_feature)
            xs1_conv1_aten, xs2_conv1_aten = attention_process_2(A_1, A_4_feature, B_4_feature, 64)
            self.pooled_2_a = tf.squeeze(tf.squeeze(average_pool(xs1_conv1_aten, 'pooled_a4') ,axis=1), axis=1)
            self.pooled_2_b = tf.squeeze(tf.squeeze(average_pool(xs2_conv1_aten, 'pooled_b4') ,axis=1), axis=1)

            pooled_2_a = max_pool(xs1_conv1_aten, conf.filter_size, "pool2_a")
            pooled_2_b = max_pool(xs2_conv1_aten, conf.filter_size, "pool2_b")


            A_2 = distance(pooled_2_a, pooled_2_b)
            A_6_input, B_6_input = attention_process_1(A_2, pooled_2_a, pooled_2_b, 'w2_0', 'w2_1')
            A_6_feature, _, _ = Convolutional_Block(A_6_input,128,None,None,TRAIN,'a2-cnn')
            B_6_feature, _, _ = Convolutional_Block(B_6_input,128,None,None,TRAIN, 'b2-cnn')
            A_3 = distance(A_6_feature, B_6_feature)
            xs1_conv2_aten, xs2_conv2_aten = attention_process_2(A_3, A_6_feature, B_6_feature, 128)
            self.pooled_2_a = tf.squeeze(tf.squeeze(average_pool(xs1_conv2_aten, 'pooled_a4') ,axis=1), axis=1)
            self.pooled_2_b = tf.squeeze(tf.squeeze(average_pool(xs2_conv2_aten, 'pooled_b4') ,axis=1), axis=1)

            #pooled_3_a = max_pool(xs1_conv2_aten, conf.filter_size, "pool3_a")
            #pooled_3_b = max_pool(xs2_conv2_aten, conf.filter_size, "pool3_b")


            '''
Exemple #39
0
def train():
  """Train CIFAR-10 for a number of steps."""
  with tf.Graph().as_default(), tf.device('/cpu:0'):
    # Create a variable to count the number of train() calls. This equals the
    # number of batches processed * FLAGS.num_gpus.
    global_step = tf.get_variable(
        'global_step', [],
        initializer=tf.constant_initializer(0), trainable=False)

    # Calculate the learning rate schedule.
    num_batches_per_epoch = (cifar10.NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN /
                             FLAGS.batch_size)
    decay_steps = int(num_batches_per_epoch * cifar10.NUM_EPOCHS_PER_DECAY / FLAGS.num_gpus)

    # Decay the learning rate exponentially based on the number of steps.
    lr = tf.train.exponential_decay(cifar10.INITIAL_LEARNING_RATE,
                                    global_step,
                                    decay_steps,
                                    cifar10.LEARNING_RATE_DECAY_FACTOR,
                                    staircase=True)

    # Create an optimizer that performs gradient descent.
    opt = tf.train.GradientDescentOptimizer(lr)
    if(FLAGS.iter_size > 1):
      opt = tp.optimizer.AccumGradOptimizer(opt, FLAGS.iter_size)

    # Get images and labels for CIFAR-10.
    images, labels = cifar10.distorted_inputs()
    batch_queue = tf.contrib.slim.prefetch_queue.prefetch_queue(
          [images, labels], capacity=2 * FLAGS.num_gpus)
    # Calculate the gradients for each model tower.
    tower_grads = []
    with tf.variable_scope(tf.get_variable_scope()):
      for i in xrange(FLAGS.num_gpus):
        with tf.device('/gpu:%d' % i):
          with tf.name_scope('%s_%d' % (cifar10.TOWER_NAME, i)) as scope:
            # Dequeues one batch for the GPU
            image_batch, label_batch = batch_queue.dequeue()
            # Calculate the loss for one tower of the CIFAR model. This function
            # constructs the entire CIFAR model but shares the variables across
            # all towers.
            loss = tower_loss(scope, image_batch, label_batch)

            # Reuse variables for the next tower.
            tf.get_variable_scope().reuse_variables()

            # Retain the summaries from the final tower.
            summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope)

            # Calculate the gradients for the batch of data on this CIFAR tower.
            grads = opt.compute_gradients(loss)

            # Keep track of the gradients across all towers.
            tower_grads.append(grads)

    # We must calculate the mean of each gradient. Note that this is the
    # synchronization point across all towers.
    grads = average_gradients(tower_grads)

    # Add a summary to track the learning rate.
    summaries.append(tf.summary.scalar('learning_rate', lr))

    # Add histograms for gradients.
    for grad, var in grads:
      if grad is not None:
        summaries.append(tf.summary.histogram(var.op.name + '/gradients', grad))

    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)


    # Add histograms for trainable variables.
    for var in tf.trainable_variables():
      summaries.append(tf.summary.histogram(var.op.name, var))

    # Track the moving averages of all trainable variables.
    variable_averages = tf.train.ExponentialMovingAverage(
        cifar10.MOVING_AVERAGE_DECAY, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())

    # Group all updates to into a single train op.
    train_op = tf.group(apply_gradient_op, variables_averages_op)

    # Create a saver.
    saver = tf.train.Saver(tf.global_variables())

    # Build the summary operation from the last tower summaries.
    summary_op = tf.summary.merge(summaries)

    # Build an initialization operation to run below.
    init = tf.global_variables_initializer()

    # Start running operations on the Graph. allow_soft_placement must be set to
    # True to build towers on GPU, as some of the ops do not have GPU
    # implementations.
    config=tf.ConfigProto(
        allow_soft_placement=True,
        log_device_placement=FLAGS.log_device_placement)

    # only allocate needed memory size
    config.gpu_options.allow_growth=True
    # visible gpu number
    #config.gpu_options.visible_device_list='1,2,3,4'
    # gpu memory usage restriction ratio
    #config.gpu_options.per_process_gpu_memory_fraction=0.9
    sess = tf.Session(config=config)

    sess.run(init)

    # Start the queue runners.
    tf.train.start_queue_runners(sess=sess)

    summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph)

    for step in xrange(FLAGS.max_steps):
      start_time = time.time()
      _, loss_value = sess.run([train_op, loss])
      duration = time.time() - start_time

      assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

      if step % 10 == 0:
        num_examples_per_step = FLAGS.batch_size * FLAGS.num_gpus * FLAGS.iter_size
        examples_per_sec = num_examples_per_step / duration
        sec_per_batch = duration / (FLAGS.num_gpus * FLAGS.iter_size)

        format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                      'sec/batch)')
        print (format_str % (datetime.now(), step, loss_value,
                             examples_per_sec, sec_per_batch))
        print ('LR:%s' % (sess.run(lr)))
        print ('Global Step:%s' % (sess.run(global_step)))
        
      if step % 100 == 0:
        summary_str = sess.run(summary_op)
        summary_writer.add_summary(summary_str, step)

      # Save the model checkpoint periodically.
      if step % 1000 == 0 or (step + 1) == FLAGS.max_steps:
        checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
        saver.save(sess, checkpoint_path, global_step=step)
 def _get_target_action(self, vector_input):
     vector_input = self.cast(vector_input)
     with tf.device(self.device):
         target_mu = self.actor_target_net(vector_input)
     return tf.clip_by_value(target_mu + self.action_noise(), -1, 1)
def alltoall_ring(xs, devices, split_axis, concat_axis):
    """MPI alltoall operation.

  Performance-optimized for a ring of devices.

  Args:
    xs: a list of n tf.Tensors
    devices: a list of n strings
    split_axis: an integer
    concat_axis: an integer

  Returns:
    a list of n Tensors
  """
    n = len(xs)
    if n == 1:
        return xs
    # set up
    # [target, source]
    parts = [[None] * n for i in xrange(n)]

    def my_split(x, size_splits):
        total_size = tf.shape(x)[split_axis]
        part_size = total_size // sum(size_splits)
        return tf.split(x, [s * part_size for s in size_splits],
                        axis=split_axis)

    forward_message_size = (n - 1) // 2
    backward_message_size = (n - 1) - forward_message_size
    forward_messages = [None] * n
    backward_messages = [None] * n
    for i in xrange(n):
        with tf.device(devices[i]):
            if i >= backward_message_size:
                a, b, c, d = my_split(xs[i], [
                    i - backward_message_size, backward_message_size, 1,
                    n - i - 1
                ])
                backward_messages[i] = b
                parts[i][i] = c
                forward_messages[i] = tf.concat([d, a], axis=split_axis)
            else:
                a, b, c, d = my_split(
                    xs[i],
                    [i, 1, forward_message_size, backward_message_size - i])
                backward_messages[i] = tf.concat([d, a], axis=split_axis)
                parts[i][i] = b
                forward_messages[i] = c
    for step in xrange(1,
                       max(forward_message_size, backward_message_size) + 1):
        new_forward_messages = [None] * n
        new_backward_messages = [None] * n
        for i in xrange(n):
            with tf.device(devices[i]):
                if forward_message_size > 0:
                    parts[i][(i - step) %
                             n], new_forward_messages[i] = my_split(
                                 forward_messages[(i - 1) % n],
                                 [1, forward_message_size - 1])
                if backward_message_size > 0:
                    new_backward_messages[i], parts[i][
                        (i + step) % n] = my_split(
                            backward_messages[(i + 1) % n],
                            [backward_message_size - 1, 1])
        forward_message_size -= 1
        backward_message_size -= 1
        forward_messages = new_forward_messages
        backward_messages = new_backward_messages
    return mtf.parallel(devices, tf.concat, parts, axis=[concat_axis] * n)
Exemple #42
0
batch_size = 128
embedding_size = 128
skip_window = 1
num_skips = 2
valid_size = 16
valid_window =100
valid_examples = np.random.choice(valid_window, valid_size, replace=False)
num_sampled = 64

graph = tf.Graph()
with graph.as_default():
    train_inputs = tf.placeholder(tf.int32, shape=[batch_size])
    train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1])    
    valid_dataset = tf.constant(valid_examples, dtype=tf.int32)
    
    with tf.device('/cpu:0'):
        embeddings = tf.Variable(
            tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0)
        )
        
        embed = tf.nn.embedding_lookup(embeddings, train_inputs)
        
        nce_weights = tf.Variable(
            tf.truncated_normal([vocabulary_size, embedding_size],
                               stddev=1.0 / math.sqrt(embedding_size))
        )
        
        nce_biases = tf.Variable(tf.zeros([vocabulary_size]))
        
    loss = tf.reduce_mean(tf.nn.nce_loss(weights=nce_weights,
                                        biases=nce_biases,
Exemple #43
0
    idx = tf.slice(outi, [0, 0, 0], [-1, -1, k])
    val = tf.slice(out, [0, 0, 0], [-1, -1, k])
    print((idx, val))
    #val, idx = tf.nn.top_k(-dist, k=k) # ONLY SUPPORT CPU
    return val, idx


if __name__ == '__main__':
    knn = True
    import numpy as np
    import time
    np.random.seed(100)
    pts = np.random.random((32, 512, 64)).astype('float32')
    tmp1 = np.random.random((32, 512, 3)).astype('float32')
    tmp2 = np.random.random((32, 128, 3)).astype('float32')
    with tf.device('/gpu:1'):
        points = tf.constant(pts)
        xyz1 = tf.constant(tmp1)
        xyz2 = tf.constant(tmp2)
        radius = 0.1
        nsample = 64
        if knn:
            _, idx = knn_point(nsample, xyz1, xyz2)
            grouped_points = group_point(points, idx)
        else:
            idx, _ = query_ball_point(radius, nsample, xyz1, xyz2)
            grouped_points = group_point(points, idx)
            #grouped_points_grad = tf.ones_like(grouped_points)
            #points_grad = tf.gradients(grouped_points, points, grouped_points_grad)
    with tf.Session('') as sess:
        now = time.time()
Exemple #44
0
    def _build_word_char_embeddings(self):
        '''
        options contains key 'char_cnn': {

        'n_characters': 262,

        # includes the start / end characters
        'max_characters_per_token': 50,

        'filters': [
            [1, 32],
            [2, 32],
            [3, 64],
            [4, 128],
            [5, 256],
            [6, 512],
            [7, 512]
        ],
        'activation': 'tanh',

        # for the character embedding
        'embedding': {'dim': 16}

        # for highway layers
        # if omitted, then no highway layers
        'n_highway': 2,
        }
        '''
        projection_dim = self.options['lstm']['projection_dim']

        cnn_options = self.options['char_cnn']
        filters = cnn_options['filters']
        n_filters = sum(f[1] for f in filters)
        max_chars = cnn_options['max_characters_per_token']
        char_embed_dim = cnn_options['embedding']['dim']
        n_chars = cnn_options['n_characters']
        if n_chars != 262:
            raise InvalidNumberOfCharacters(
                "Set n_characters=262 after training see the README.md")
        if cnn_options['activation'] == 'tanh':
            activation = tf.nn.tanh
        elif cnn_options['activation'] == 'relu':
            activation = tf.nn.relu

        # the character embeddings
        with tf.device("/cpu:0"):
            self.embedding_weights = tf.get_variable(
                "char_embed", [n_chars, char_embed_dim],
                dtype=DTYPE,
                initializer=tf.random_uniform_initializer(-1.0, 1.0))
            # shape (batch_size, unroll_steps, max_chars, embed_dim)
            self.char_embedding = tf.nn.embedding_lookup(
                self.embedding_weights, self.ids_placeholder)

        # the convolutions
        def make_convolutions(inp):
            with tf.variable_scope('CNN') as scope:
                convolutions = []
                for i, (width, num) in enumerate(filters):
                    if cnn_options['activation'] == 'relu':
                        # He initialization for ReLU activation
                        # with char embeddings init between -1 and 1
                        #w_init = tf.random_normal_initializer(
                        #    mean=0.0,
                        #    stddev=np.sqrt(2.0 / (width * char_embed_dim))
                        #)

                        # Kim et al 2015, +/- 0.05
                        w_init = tf.random_uniform_initializer(minval=-0.05,
                                                               maxval=0.05)
                    elif cnn_options['activation'] == 'tanh':
                        # glorot init
                        w_init = tf.random_normal_initializer(
                            mean=0.0,
                            stddev=np.sqrt(1.0 / (width * char_embed_dim)))
                    w = tf.get_variable("W_cnn_%s" % i,
                                        [1, width, char_embed_dim, num],
                                        initializer=w_init,
                                        dtype=DTYPE)
                    b = tf.get_variable(
                        "b_cnn_%s" % i, [num],
                        dtype=DTYPE,
                        initializer=tf.constant_initializer(0.0))

                    conv = tf.nn.conv2d(
                        inp, w, strides=[1, 1, 1, 1], padding="VALID") + b
                    # now max pool
                    conv = tf.nn.max_pool(conv,
                                          [1, 1, max_chars - width + 1, 1],
                                          [1, 1, 1, 1], 'VALID')

                    # activation
                    conv = activation(conv)
                    conv = tf.squeeze(conv, squeeze_dims=[2])

                    convolutions.append(conv)

            return tf.concat(convolutions, 2)

        embedding = make_convolutions(self.char_embedding)

        # for highway and projection layers
        n_highway = cnn_options.get('n_highway')
        use_highway = n_highway is not None and n_highway > 0
        use_proj = n_filters != projection_dim

        if use_highway or use_proj:
            #   reshape from (batch_size, n_tokens, dim) to (-1, dim)
            batch_size_n_tokens = tf.shape(embedding)[0:2]
            embedding = tf.reshape(embedding, [-1, n_filters])

        # set up weights for projection
        if use_proj:
            assert n_filters > projection_dim
            with tf.variable_scope('CNN_proj') as scope:
                W_proj_cnn = tf.get_variable(
                    "W_proj", [n_filters, projection_dim],
                    initializer=tf.random_normal_initializer(
                        mean=0.0, stddev=np.sqrt(1.0 / n_filters)),
                    dtype=DTYPE)
                b_proj_cnn = tf.get_variable(
                    "b_proj", [projection_dim],
                    initializer=tf.constant_initializer(0.0),
                    dtype=DTYPE)

        # apply highways layers
        def high(x, ww_carry, bb_carry, ww_tr, bb_tr):
            carry_gate = tf.nn.sigmoid(tf.matmul(x, ww_carry) + bb_carry)
            transform_gate = tf.nn.relu(tf.matmul(x, ww_tr) + bb_tr)
            return carry_gate * transform_gate + (1.0 - carry_gate) * x

        if use_highway:
            highway_dim = n_filters

            for i in range(n_highway):
                with tf.variable_scope('CNN_high_%s' % i) as scope:
                    W_carry = tf.get_variable(
                        'W_carry',
                        [highway_dim, highway_dim],
                        # glorit init
                        initializer=tf.random_normal_initializer(
                            mean=0.0, stddev=np.sqrt(1.0 / highway_dim)),
                        dtype=DTYPE)
                    b_carry = tf.get_variable(
                        'b_carry', [highway_dim],
                        initializer=tf.constant_initializer(-2.0),
                        dtype=DTYPE)
                    W_transform = tf.get_variable(
                        'W_transform', [highway_dim, highway_dim],
                        initializer=tf.random_normal_initializer(
                            mean=0.0, stddev=np.sqrt(1.0 / highway_dim)),
                        dtype=DTYPE)
                    b_transform = tf.get_variable(
                        'b_transform', [highway_dim],
                        initializer=tf.constant_initializer(0.0),
                        dtype=DTYPE)

                embedding = high(embedding, W_carry, b_carry, W_transform,
                                 b_transform)

        # finally project down if needed
        if use_proj:
            embedding = tf.matmul(embedding, W_proj_cnn) + b_proj_cnn

        # reshape back to (batch_size, tokens, dim)
        if use_highway or use_proj:
            shp = tf.concat([batch_size_n_tokens, [projection_dim]], axis=0)
            embedding = tf.reshape(embedding, shp)

        # at last assign attributes for remainder of the model
        self.embedding = embedding
Exemple #45
0
    def __init__(self,
                 sequence_length,
                 num_classes,
                 vocab_size,
                 emd_dim,
                 filter_sizes,
                 num_filters,
                 l2_reg_lambda=0.0,
                 batch_size=32,
                 reference_size=16,
                 dropout_keep_prob=.75):
        # Placeholders for input, output and dropout
        self.input_x = tf.placeholder(tf.int32, [batch_size, sequence_length],
                                      name="input_x")
        self.input_ref = tf.placeholder(tf.int32,
                                        [reference_size, sequence_length],
                                        name="input_ref")
        self.input_y = tf.placeholder(tf.float32, [batch_size, num_classes],
                                      name="input_y")
        self.dropout_keep_prob = dropout_keep_prob

        # Keeping track of l2 regularization loss (optional)
        l2_loss = tf.constant(0.0)

        with tf.variable_scope('discriminator'):
            # Embedding layer
            with tf.device('/cpu:0'), tf.name_scope("embedding"):
                self.W = tf.Variable(tf.random_uniform([vocab_size, emd_dim],
                                                       -1.0, 1.0),
                                     name="W")
                self.embedded_chars = tf.nn.embedding_lookup(
                    self.W, self.input_x)
                self.embedded_chars_expanded = tf.expand_dims(
                    self.embedded_chars, -1)
                self.embedded_chars_ref = tf.nn.embedding_lookup(
                    self.W, self.input_ref)
                self.embedded_chars_expanded_ref = tf.expand_dims(
                    self.embedded_chars_ref, -1)

            # Create a convolution + maxpool layer for each filter size
            pooled_outputs = []
            pooled_outputs_ref = []
            for filter_size, num_filter in zip(filter_sizes, num_filters):
                with tf.name_scope("conv-maxpool-%s" % filter_size):
                    # Convolution Layer
                    filter_shape = [filter_size, emd_dim, 1, num_filter]
                    W = tf.Variable(tf.truncated_normal(filter_shape,
                                                        stddev=0.1),
                                    name="W")
                    b = tf.Variable(tf.constant(0.1, shape=[num_filter]),
                                    name="b")
                    conv = tf.nn.conv2d(self.embedded_chars_expanded,
                                        W,
                                        strides=[1, 1, 1, 1],
                                        padding="VALID",
                                        name="conv")
                    conv_ref = tf.nn.conv2d(self.embedded_chars_expanded_ref,
                                            W,
                                            strides=[1, 1, 1, 1],
                                            padding="VALID",
                                            name="conv_ref")
                    # Apply nonlinearity
                    h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
                    h_ref = tf.nn.relu(
                        tf.nn.bias_add(conv_ref, b, name="relu_ref"))
                    # Maxpooling over the outputs
                    pooled = tf.nn.max_pool(
                        h,
                        ksize=[1, sequence_length - filter_size + 1, 1, 1],
                        strides=[1, 1, 1, 1],
                        padding='VALID',
                        name="pool")
                    pooled_ref = tf.nn.max_pool(
                        h_ref,
                        ksize=[1, sequence_length - filter_size + 1, 1, 1],
                        strides=[1, 1, 1, 1],
                        padding='VALID',
                        name="pool_ref")
                    pooled_outputs.append(pooled)
                    pooled_outputs_ref.append(pooled_ref)

            # Combine all the pooled features
            num_filters_total = sum(num_filters)
            self.h_pool = tf.concat(pooled_outputs, 3)
            self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total])

            self.h_pool_ref = tf.concat(pooled_outputs_ref, 3)
            self.h_pool_flat_ref = tf.reshape(self.h_pool_ref,
                                              [-1, num_filters_total])

            # Add highway
            with tf.name_scope("highway"):
                self.h_highway = highway(self.h_pool_flat,
                                         self.h_pool_flat.get_shape()[1],
                                         1,
                                         0,
                                         scope="highway")
                self.h_highway_ref = highway(
                    self.h_pool_flat_ref,
                    self.h_pool_flat_ref.get_shape()[1],
                    1,
                    0,
                    scope="highway")

            # Add dropout
            with tf.name_scope("dropout"):
                self.h_drop = tf.nn.dropout(self.h_highway,
                                            self.dropout_keep_prob)
                self.h_drop_ref = tf.nn.dropout(self.h_highway_ref,
                                                self.dropout_keep_prob)

            # Final (unnormalized) scores and predictions
            with tf.name_scope("output"):
                """
                scores = tf.TensorArray(dtype=tf.float32, size=batch_size, dynamic_size=False, infer_shape=True)
                def rank_recurrence(i, scores):
                    rank_score = get_rank_score(tf.nn.embedding_lookup(self.h_drop, i), self.h_drop_ref)
                    scores = scores.write(i, rank_score)
                    return i + 1, scores
                _, self.scores = control_flow_ops.while_loop(
                    cond=lambda i, _1: i < batch_size,
                    body=rank_recurrence,
                    loop_vars=(tf.constant(0, dtype=tf.int32), scores)
                )
                """
                score = []
                """
                for i in range(batch_size):
                    value = tf.constant(0.0, dtype=tf.float32)
                    for j in range(reference_size):
                        value += cosine_distance(tf.nn.embedding_lookup(self.h_drop, i),
                                                 tf.nn.embedding_lookup(self.h_drop_ref, j))
                    score.append(value) 
                self.scores = tf.stack(score)
                self.scores = tf.reshape(self.scores, [-1])
                """
                self.reference = tf.reduce_mean(tf.nn.l2_normalize(
                    self.h_drop_ref, axis=-1),
                                                axis=0,
                                                keep_dims=True)
                self.feature = tf.nn.l2_normalize(self.h_drop, axis=-1)
                self.scores = tf.reshape(
                    self.feature @ tf.transpose(self.reference, perm=[1, 0]),
                    [-1])
                self.ypred_for_auc = tf.reshape(tf.nn.softmax(self.scores),
                                                [-1])
                self.log_score = tf.log(self.ypred_for_auc)

            # CalculateMean cross-entropy loss
            with tf.name_scope("loss"):
                self.neg_vec = tf.nn.embedding_lookup(
                    tf.transpose(self.input_y), 1)
                self.pos_vec = tf.nn.embedding_lookup(
                    tf.transpose(self.input_y), 0)
                losses_minus = self.log_score * self.neg_vec
                losses_posit = self.log_score * self.pos_vec
                self.loss = (-tf.reduce_sum(losses_minus) /
                             tf.maximum(tf.reduce_sum(self.neg_vec), 1e-5) +
                             tf.reduce_sum(losses_posit) /
                             tf.maximum(tf.reduce_sum(self.pos_vec), 1e-5)
                             ) / reference_size

        self.params = [
            param for param in tf.trainable_variables()
            if 'discriminator' in param.name
        ]
        d_optimizer = tf.train.AdamOptimizer(1e-4)
        grads_and_vars = d_optimizer.compute_gradients(self.loss,
                                                       self.params,
                                                       aggregation_method=2)
        self.train_op = d_optimizer.apply_gradients(grads_and_vars)
        from ...models.rankgan import SAVING_PATH
        SavableModel.__init__(self, self.params, SAVING_PATH, 'discriminator')
    def get_model(self, num_classes, activation='sigmoid'):
        max_len = opt.max_len
        max_ngram_len = opt.ngram_max_len
        voca_size = opt.unigram_hash_size + 1

        with tf.device('/gpu:0'):

            def LAYER(input1, input2, max_len=max_len):
                Avg = Dropout(rate=0.5)(input1)
                Avg = BatchNormalization()(Avg)
                Avg = GlobalAveragePooling1D()(Avg)

                mat = Reshape((max_len, 1))(input2)
                Dot = dot([input1, mat], axes=1)
                Dot = Flatten()(Dot)
                Dot = Dropout(rate=0.5)(Dot)
                Dot = BatchNormalization()(Dot)

                return Avg, Dot

            embd = Embedding(voca_size, opt.embd_size, name='uni_embd')
            ####################################
            uni = Input((max_len, ), name="t_uni")
            uni_embd = embd(uni)  # token
            w_uni = Input((max_len, ), name="w_uni")
            ####################################
            shape = Input((max_len, ), name="shape")
            shape_embd = embd(shape)
            w_shape = Input((max_len, ), name="w_shape")
            ####################################
            noun = Input((max_len, ), name="noun")
            noun_embd = embd(noun)
            w_noun = Input((max_len, ), name="w_noun")
            ####################################
            bmm = Input((max_len, ), name="bmm")
            bmm_embd = embd(bmm)
            w_bmm = Input((max_len, ), name="w_bmm")
            ####################################
            ngram = Input((max_ngram_len, ), name="ngram")
            ngram_embd = embd(ngram)
            w_ngram = Input((max_ngram_len, ), name="w_ngram")
            ####################################
            jamo3 = Input((max_len, ), name="jamo3")
            jamo_embd3 = embd(jamo3)
            w_jamo3 = Input((max_len, ), name="w_jamo3")
            ####################################
            jamo2 = Input((max_len, ), name="jamo2")
            jamo_embd2 = embd(jamo2)
            w_jamo2 = Input((max_len, ), name="w_jamo2")
            ####################################
            jamo1 = Input((max_len, ), name="jamo1")
            jamo_embd1 = embd(jamo1)
            w_jamo1 = Input((max_len, ), name="w_jamo1")
            ####################################
            img = Input((2048, ), name="image")

            uni_avg, uni_dot = LAYER(uni_embd, w_uni, max_len=max_len)
            shape_avg, shape_dot = LAYER(shape_embd, w_shape, max_len=max_len)
            noun_avg, noun_dot = LAYER(noun_embd, w_noun, max_len=max_len)
            ngram_avg, ngram_dot = LAYER(ngram_embd,
                                         w_ngram,
                                         max_len=max_ngram_len)
            jamo_avg3, jamo_dot3 = LAYER(jamo_embd3, w_jamo3, max_len=max_len)
            jamo_avg2, jamo_dot2 = LAYER(jamo_embd2, w_jamo2, max_len=max_len)
            jamo_avg1, jamo_dot1 = LAYER(jamo_embd1, w_jamo1, max_len=max_len)
            bmm_avg, bmm_dot = LAYER(bmm_embd, w_bmm, max_len=max_len)

            result = Concatenate()([
                uni_avg, uni_dot, shape_avg, shape_dot, noun_avg, noun_dot,
                ngram_avg, ngram_dot, jamo_dot3, jamo_dot2, jamo_dot1, bmm_dot,
                img
            ])

            result = Dropout(rate=0.5)(result)
            result = BatchNormalization()(result)
            result = Activation('relu')(result)
            outputs = Dense(num_classes, activation=activation)(result)
            ####################################
            model = Model(inputs=[
                uni, w_uni, shape, w_shape, noun, w_noun, bmm, w_bmm, ngram,
                w_ngram, jamo3, w_jamo3, jamo2, w_jamo2, jamo1, w_jamo1, img
            ],
                          outputs=outputs)
            optm = keras.optimizers.adam(0.0002)

            model.compile(loss='categorical_crossentropy',
                          optimizer=optm,
                          metrics=[top1_acc])
            model.summary(print_fn=lambda x: self.logger.info(x))

        return model
    def __init__(self, args, infer=False):
        self.args = args
        if infer:
            args.batch_size = 1
            args.seq_length = 1

        if args.model == 'rnn':
            cell_fn = rnn.BasicRNNCell
        elif args.model == 'gru':
            cell_fn = rnn.GRUCell
        elif args.model == 'lstm':
            cell_fn = rnn.BasicLSTMCell
        else:
            raise Exception("model type not supported: {}".format(args.model))

        cells = []
        for _ in range(args.num_layers):
            cell = cell_fn(args.rnn_size)
            cells.append(cell)

        self.cell = cell = rnn.MultiRNNCell(cells)

        self.input_data = tf.placeholder(tf.int32,
                                         [args.batch_size, args.seq_length])
        self.targets = tf.placeholder(tf.int32,
                                      [args.batch_size, args.seq_length])
        self.initial_state = cell.zero_state(args.batch_size, tf.float32)
        self.batch_pointer = tf.Variable(0,
                                         name="batch_pointer",
                                         trainable=False,
                                         dtype=tf.int32)
        self.inc_batch_pointer_op = tf.assign(self.batch_pointer,
                                              self.batch_pointer + 1)
        self.epoch_pointer = tf.Variable(0,
                                         name="epoch_pointer",
                                         trainable=False)
        self.batch_time = tf.Variable(0.0, name="batch_time", trainable=False)
        tf.summary.scalar("time_batch", self.batch_time)

        def variable_summaries(var):
            """Attach a lot of summaries to a Tensor (for TensorBoard visualization)."""
            with tf.name_scope('summaries'):
                mean = tf.reduce_mean(var)
                tf.summary.scalar('mean', mean)
                #with tf.name_scope('stddev'):
                #   stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
                #tf.summary.scalar('stddev', stddev)
                tf.summary.scalar('max', tf.reduce_max(var))
                tf.summary.scalar('min', tf.reduce_min(var))
                #tf.summary.histogram('histogram', var)

        with tf.variable_scope('rnnlm'):
            softmax_w = tf.get_variable("softmax_w",
                                        [args.rnn_size, args.vocab_size])
            variable_summaries(softmax_w)
            softmax_b = tf.get_variable("softmax_b", [args.vocab_size])
            variable_summaries(softmax_b)
            with tf.device("/cpu:0"):
                embedding = tf.get_variable("embedding",
                                            [args.vocab_size, args.rnn_size])
                inputs = tf.split(
                    tf.nn.embedding_lookup(embedding, self.input_data),
                    args.seq_length, 1)
                inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        def loop(prev, _):
            prev = tf.matmul(prev, softmax_w) + softmax_b
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(embedding, prev_symbol)

        outputs, last_state = legacy_seq2seq.rnn_decoder(
            inputs,
            self.initial_state,
            cell,
            loop_function=loop if infer else None,
            scope='rnnlm')
        output = tf.reshape(tf.concat(outputs, 1), [-1, args.rnn_size])
        self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)
        loss = legacy_seq2seq.sequence_loss_by_example(
            [self.logits], [tf.reshape(self.targets, [-1])],
            [tf.ones([args.batch_size * args.seq_length])], args.vocab_size)
        self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length
        tf.summary.scalar("cost", self.cost)
        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                                          args.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self.train_op = optimizer.apply_gradients(zip(grads, tvars))
Exemple #48
0
def main(unused_argv):
    tf.logging.set_verbosity(tf.logging.INFO)
    # Set up deployment (i.e., multi-GPUs and/or multi-replicas).
    config = model_deploy.DeploymentConfig(
        num_clones=FLAGS.num_clones,
        clone_on_cpu=FLAGS.clone_on_cpu,
        replica_id=FLAGS.task,
        num_replicas=FLAGS.num_replicas,
        num_ps_tasks=FLAGS.num_ps_tasks)

    # Split the batch across GPUs.
    assert FLAGS.train_batch_size % config.num_clones == 0, (
        'Training batch size not divisble by number of clones (GPUs).')

    clone_batch_size = FLAGS.train_batch_size // config.num_clones

    tf.gfile.MakeDirs(FLAGS.train_logdir)
    tf.logging.info('Training on %s set', FLAGS.train_split)

    with tf.Graph().as_default() as graph:
        with tf.device(config.inputs_device()):
            dataset = data_generator.Dataset(
                dataset_name=FLAGS.dataset,
                split_name=FLAGS.train_split,
                dataset_dir=FLAGS.dataset_dir,
                batch_size=clone_batch_size,
                crop_size=[int(sz) for sz in FLAGS.train_crop_size],
                min_resize_value=FLAGS.min_resize_value,
                max_resize_value=FLAGS.max_resize_value,
                resize_factor=FLAGS.resize_factor,
                min_scale_factor=FLAGS.min_scale_factor,
                max_scale_factor=FLAGS.max_scale_factor,
                scale_factor_step_size=FLAGS.scale_factor_step_size,
                model_variant=FLAGS.model_variant,
                num_readers=4,
                is_training=True,
                should_shuffle=True,
                should_repeat=True)

        # Create the global step on the device storing the variables.
        with tf.device(config.variables_device()):
            global_step = tf.train.get_or_create_global_step()

            # Define the model and create clones.
            model_fn = _build_deeplab
            model_args = (dataset.get_one_shot_iterator(), {
                common.OUTPUT_TYPE: dataset.num_of_classes
            }, dataset.ignore_label)
            clones = model_deploy.create_clones(
                config, model_fn, args=model_args)

            # Gather update_ops from the first clone. These contain, for example,
            # the updates for the batch_norm variables created by model_fn.
            first_clone_scope = config.clone_scope(0)
            update_ops = tf.get_collection(
                tf.GraphKeys.UPDATE_OPS, first_clone_scope)

        # Gather initial summaries.
        summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

        # Add summaries for model variables.
        for model_var in tf.model_variables():
            summaries.add(tf.summary.histogram(model_var.op.name, model_var))

        # Add summaries for images, labels, semantic predictions
        if FLAGS.save_summaries_images:
            summary_image = graph.get_tensor_by_name(
                ('%s/%s:0' % (first_clone_scope, common.IMAGE)).strip('/'))
            summaries.add(
                tf.summary.image('samples/%s' % common.IMAGE, summary_image))

            first_clone_label = graph.get_tensor_by_name(
                ('%s/%s:0' % (first_clone_scope, common.LABEL)).strip('/'))
            # Scale up summary image pixel values for better visualization.
            pixel_scaling = max(1, 255 // dataset.num_of_classes)
            summary_label = tf.cast(
                first_clone_label * pixel_scaling, tf.uint8)
            summaries.add(
                tf.summary.image('samples/%s' % common.LABEL, summary_label))

            first_clone_output = graph.get_tensor_by_name(
                ('%s/%s:0' % (first_clone_scope, common.OUTPUT_TYPE)).strip('/'))
            predictions = tf.expand_dims(tf.argmax(first_clone_output, 3), -1)

            summary_predictions = tf.cast(
                predictions * pixel_scaling, tf.uint8)
            summaries.add(
                tf.summary.image(
                    'samples/%s' % common.OUTPUT_TYPE, summary_predictions))

        # Add summaries for losses.
        for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
            summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss))

        # Build the optimizer based on the device specification.
        with tf.device(config.optimizer_device()):
            learning_rate = train_utils.get_model_learning_rate(
                FLAGS.learning_policy,
                FLAGS.base_learning_rate,
                FLAGS.learning_rate_decay_step,
                FLAGS.learning_rate_decay_factor,
                FLAGS.training_number_of_steps,
                FLAGS.learning_power,
                FLAGS.slow_start_step,
                FLAGS.slow_start_learning_rate,
                decay_steps=FLAGS.decay_steps,
                end_learning_rate=FLAGS.end_learning_rate)

            summaries.add(tf.summary.scalar('learning_rate', learning_rate))

            if FLAGS.optimizer == 'momentum':
                optimizer = tf.train.MomentumOptimizer(
                    learning_rate, FLAGS.momentum)
            elif FLAGS.optimizer == 'adam':
                optimizer = tf.train.AdamOptimizer(
                    learning_rate=FLAGS.adam_learning_rate, epsilon=FLAGS.adam_epsilon)
            else:
                raise ValueError('Unknown optimizer')

        if FLAGS.quantize_delay_step >= 0:
            if FLAGS.num_clones > 1:
                raise ValueError(
                    'Quantization doesn\'t support multi-clone yet.')
            contrib_quantize.create_training_graph(
                quant_delay=FLAGS.quantize_delay_step)

        startup_delay_steps = FLAGS.task * FLAGS.startup_delay_steps

        with tf.device(config.variables_device()):
            total_loss, grads_and_vars = model_deploy.optimize_clones(
                clones, optimizer)
            total_loss = tf.check_numerics(total_loss, 'Loss is inf or nan.')
            summaries.add(tf.summary.scalar('total_loss', total_loss))

            # Modify the gradients for biases and last layer variables.
            last_layers = model.get_extra_layer_scopes(
                FLAGS.last_layers_contain_logits_only)
            grad_mult = train_utils.get_model_gradient_multipliers(
                last_layers, FLAGS.last_layer_gradient_multiplier)
            if grad_mult:
                grads_and_vars = slim.learning.multiply_gradients(
                    grads_and_vars, grad_mult)

            # Create gradient update op.
            grad_updates = optimizer.apply_gradients(
                grads_and_vars, global_step=global_step)
            update_ops.append(grad_updates)
            update_op = tf.group(*update_ops)
            with tf.control_dependencies([update_op]):
                train_tensor = tf.identity(total_loss, name='train_op')

        # Add the summaries from the first clone. These contain the summaries
        # created by model_fn and either optimize_clones() or _gather_clone_loss().
        summaries |= set(
            tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope))

        # Merge all summaries together.
        summary_op = tf.summary.merge(list(summaries))

        # Soft placement allows placing on CPU ops without GPU implementation.
        session_config = tf.ConfigProto(
            allow_soft_placement=True, log_device_placement=False)

        # Start the training.
        profile_dir = FLAGS.profile_logdir
        if profile_dir is not None:
            tf.gfile.MakeDirs(profile_dir)

        with contrib_tfprof.ProfileContext(
                enabled=profile_dir is not None, profile_dir=profile_dir):
            init_fn = None
            if FLAGS.tf_initial_checkpoint:
                init_fn = train_utils.get_model_init_fn(
                    FLAGS.train_logdir,
                    FLAGS.tf_initial_checkpoint,
                    FLAGS.initialize_last_layer,
                    last_layers,
                    ignore_missing_vars=True)

            slim.learning.train(
                train_tensor,
                logdir=FLAGS.train_logdir,
                log_every_n_steps=FLAGS.log_steps,
                master=FLAGS.master,
                number_of_steps=FLAGS.training_number_of_steps,
                is_chief=(FLAGS.task == 0),
                session_config=session_config,
                startup_delay_steps=startup_delay_steps,
                init_fn=init_fn,
                summary_op=summary_op,
                save_summaries_secs=FLAGS.save_summaries_secs,
                save_interval_secs=FLAGS.save_interval_secs)
Exemple #49
0
    tf.float32, [None, 1, 4])  # [batch, num_bbox, (y1, x1, y2, x2)]

image_splits = tf.split(input_image, num_gpus)
ratio_splits = tf.split(input_ratio, num_gpus)
gt_bbox_splits = tf.split(input_gt_bbox, num_gpus)

opt = tf.train.AdamOptimizer(0.001)
global_step = tf.Variable(0, name='global_step', trainable=False)

tower_grads = []
tower_loss = []
counter = 0

with tf.variable_scope(tf.get_variable_scope()):
    for d in range(num_gpus):
        with tf.device('/gpu:{}'.format(d)):
            with tf.name_scope('{}_{}'.format('tower', d)):
                loss = build_model(image_splits[counter],
                                   ratio_splits[counter],
                                   gt_bbox_splits[counter])
                tf.get_variable_scope().reuse_variables()
                counter += 1
                with tf.variable_scope("loss"):
                    grads_vars_all = opt.compute_gradients(loss)
                    tower_grads.append(grads_vars_all)
                    tower_loss.append(loss)

mean_loss = tf.stack(axis=0, values=tower_loss)
mean_loss = tf.reduce_mean(mean_loss, 0)
mean_grads = average_gradients(tower_grads)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
Exemple #50
0
                      tokenizer,
                      is_training=False)

    eval_examples = json.load(open(args.eval_dir1, 'r'))
    eval_data = json.load(open(args.eval_dir2, 'r'))
    eval_steps_per_epoch = len(eval_data) // (args.n_batch * n_gpu)

    eval_gen = data_generator(eval_data,
                              args.n_batch * n_gpu,
                              shuffle=False,
                              drop_last=False)

    if len(eval_data) % (args.n_batch * n_gpu) != 0:
        eval_steps_per_epoch += 1

    with tf.device("/gpu:0"):
        input_ids = tf.placeholder(tf.int32,
                                   shape=[None, args.max_seq_length],
                                   name='input_ids')
        input_masks = tf.placeholder(tf.float32,
                                     shape=[None, args.max_seq_length],
                                     name='input_masks')
        segment_ids = tf.placeholder(tf.int32,
                                     shape=[None, args.max_seq_length],
                                     name='segment_ids')
        start_positions = tf.placeholder(tf.int32,
                                         shape=[
                                             None,
                                         ],
                                         name='start_positions')
        end_positions = tf.placeholder(tf.int32,
Exemple #51
0
    def build_summaries(self):
        # SUMMARIES
        with tf.device('/cpu:0'):
            for i in range(4):
                tf.summary.scalar('ssim_loss_' + str(i),
                                  self.ssim_loss_left[i] +
                                  self.ssim_loss_right[i],
                                  collections=self.model_collection)
                tf.summary.scalar('l1_loss_' + str(i),
                                  self.l1_reconstruction_loss_left[i] +
                                  self.l1_reconstruction_loss_right[i],
                                  collections=self.model_collection)
                tf.summary.scalar('image_loss_' + str(i),
                                  self.image_loss_left[i] +
                                  self.image_loss_right[i],
                                  collections=self.model_collection)
                tf.summary.scalar('disp_gradient_loss_' + str(i),
                                  self.disp_left_loss[i] +
                                  self.disp_right_loss[i],
                                  collections=self.model_collection)
                tf.summary.scalar('lr_loss_' + str(i),
                                  self.lr_left_loss[i] + self.lr_right_loss[i],
                                  collections=self.model_collection)
                tf.summary.image('disp_left_est_' + str(i),
                                 self.disp_left_est[i],
                                 max_outputs=4,
                                 collections=self.model_collection)
                tf.summary.image('disp_right_est_' + str(i),
                                 self.disp_right_est[i],
                                 max_outputs=4,
                                 collections=self.model_collection)

                if self.params.full_summary:
                    tf.summary.image('left_est_' + str(i),
                                     self.left_est[i],
                                     max_outputs=4,
                                     collections=self.model_collection)
                    tf.summary.image('right_est_' + str(i),
                                     self.right_est[i],
                                     max_outputs=4,
                                     collections=self.model_collection)
                    tf.summary.image('ssim_left_' + str(i),
                                     self.ssim_left[i],
                                     max_outputs=4,
                                     collections=self.model_collection)
                    tf.summary.image('ssim_right_' + str(i),
                                     self.ssim_right[i],
                                     max_outputs=4,
                                     collections=self.model_collection)
                    tf.summary.image('l1_left_' + str(i),
                                     self.l1_left[i],
                                     max_outputs=4,
                                     collections=self.model_collection)
                    tf.summary.image('l1_right_' + str(i),
                                     self.l1_right[i],
                                     max_outputs=4,
                                     collections=self.model_collection)

            if self.params.full_summary:
                tf.summary.image('left',
                                 self.left,
                                 max_outputs=4,
                                 collections=self.model_collection)
                tf.summary.image('right',
                                 self.right,
                                 max_outputs=4,
                                 collections=self.model_collection)

# Careful with GPU memory allocation, TF never releases it.  TF starts with almost
# all of the GPU memory allocated.  We can slowly grow to that limit with an
# option setting:

config.gpu_options.allow_growth = True
sess_grow = tf.Session(config=config)

# Also, we can limit the size of GPU memory used, with the following option
config.gpu_options.per_process_gpu_memory_fraction = 0.4
sess_limited = tf.Session(config=config)


# How to set placements on multiple devices.
# Here, assume we have three devies CPU:0, GPU:0, and GPU:1
if tf.test.is_built_with_cuda():
    with tf.device('/cpu:0'):
        a = tf.constant([1.0, 3.0, 5.0], shape=[1, 3])
        b = tf.constant([2.0, 4.0, 6.0], shape=[3, 1])
        
        with tf.device('/gpu:1'):
            c = tf.matmul(a,b)
            c = tf.reshape(c, [-1])
        
        with tf.device('/gpu:2'):
            d = tf.matmul(b,a)
            flat_d = tf.reshape(d, [-1])
        
        combined = tf.mul(c, flat_d)
    print(sess.run(combined))
Exemple #53
0
def main(argv=None):  # pylint: disable=unused-argument
    with tf.device('gpu:0'):
        if tf.gfile.Exists(FLAGS.train_dir):
            tf.gfile.DeleteRecursively(FLAGS.train_dir)
        tf.gfile.MakeDirs(FLAGS.train_dir)
        train()
Exemple #54
0
    def __init__(self, config, device, loader, mode):
        self.config = config
        self.mode = mode
        if mode == "Train":
            self.is_training = True
            self.batch_size = self.config.train_batch_size
            self.maxstep_size = self.config.train_step_size
            reuse = None
        elif mode == "Valid":
            self.is_training = False
            self.batch_size = self.config.valid_batch_size
            reuse = True
        else:
            self.is_training = False
            self.batch_size = self.config.test_batch_size
            reuse = True

        self.hidden_size = hidden_size = config.hidden_size
        self.learning_rate = learning_rate = config.learning_rate
        opt = config.sgd_opt
        batch_size = self.batch_size
        self.node_num = node_num = config.node_num
        self.max_degree = max_degree = config.max_degree
        self.n_layer = n_layer = config.n_layer
        # assert batch_size == 1
        self.path = loader.path_file
        self.embedding_path = self.path + loader.embedding_path
        hidden_stdv = np.sqrt(1. / (hidden_size))

        # embedding initial
        with tf.device(device), tf.name_scope(mode), tf.variable_scope(
                "gnn", reuse=reuse):
            #
            self.W_1 = tf.get_variable(
                name='W_1',
                shape=[degree_max, hidden_size],
                initializer=tf.random_normal_initializer(hidden_stdv),
                # initializer=tf.zeros_initializer(),
                trainable=True,
            )
            self.W_2 = tf.get_variable(
                name='W_2',
                shape=[hidden_size, hidden_size],
                initializer=tf.random_normal_initializer(hidden_stdv),
                # initializer=tf.zeros_initializer(),
                trainable=True,
            )
        # #------------feed-----------------##
        # input data are edge information of a batch of start, end and the changes
        self.input_x = input_x = tf.placeholder(tf.int32, (batch_size, ))
        self.input_y = input_y = tf.placeholder(tf.int32, (batch_size, ))
        self.negative_sample = negative_sample = tf.placeholder(
            tf.int32, (batch_size, ))
        self.input_adj = input_adj = tf.placeholder(tf.float32,
                                                    (node_num, node_num))
        # self.feature_h0 = feature_h0 = tf.ones(shape=(node_num, 100), dtype=tf.float32) * hidden_stdv
        self.feature_h0 = feature_h0 = tf.placeholder(tf.float32,
                                                      (node_num, degree_max))
        # self.edge_y = edge_y = tf.placeholder(tf.float32, [batch_size, 1])

        with tf.device(device), tf.name_scope(mode), tf.variable_scope(
                "DynGCN", reuse=reuse):
            self.final_embedding = self.gcn(input_adj, feature_h0)

            new_embedding_x = tf.nn.embedding_lookup(self.final_embedding,
                                                     input_x)
            new_embedding_y = tf.nn.embedding_lookup(self.final_embedding,
                                                     input_y)
            new_embedding_n = tf.nn.embedding_lookup(self.final_embedding,
                                                     negative_sample)

            result = tf.reduce_mean(new_embedding_x * new_embedding_y, axis=1)
            result_n = tf.reduce_mean(new_embedding_x * new_embedding_n,
                                      axis=1)

            true_xent = tf.nn.sigmoid_cross_entropy_with_logits(
                labels=tf.ones_like(result), logits=result)
            negative_xent = tf.nn.sigmoid_cross_entropy_with_logits(
                labels=tf.zeros_like(result_n), logits=result_n)
            loss = tf.reduce_sum(true_xent) + tf.reduce_sum(negative_xent)
            self.test1 = result
            self.test2 = result_n
            # loss = - tf.reduce_mean(tf.sigmoid(result - result_n))

        # -------------evaluation--------------
        self.label_xy = tf.placeholder(tf.int32, (batch_size, ))
        self.prediction = tf.sigmoid(result)
        self.prediction_n = tf.sigmoid(result_n)

        if mode == 'Valid':
            self.auc_result, self.auc_opt = tf.metrics.auc(
                labels=self.label_xy, predictions=self.prediction)
        else:
            self.auc_result = self.auc_opt = tf.no_op()
            # self.f1_score = self.f1_opt = tf.no_op()
        # # -------------cost ---------------
        # cost_parameter = 0.

        # score_mean = tf.losses.sigmoid_cross_entropy(
        #     multi_class_labels=self.input_y,
        #     logits=s_pos
        # )
        self.cost = cost = loss

        # ---------------optimizer---------------#
        self.no_opt = tf.no_op()
        self.learning_rate = tf.Variable(config.learning_rate, trainable=False)

        if mode == 'Train':
            self.auc_opt = tf.no_op()
            self.auc_result = tf.no_op()
            if opt == 'Adam':
                self.optimizer = tf.train.AdamOptimizer(
                    self.learning_rate).minimize(cost)
            if opt == 'Momentum':
                self.optimizer = tf.train.MomentumOptimizer(
                    self.learning_rate, 0.9).minimize(cost)
            if opt == 'RMSProp':
                self.optimizer = tf.train.RMSPropOptimizer(
                    self.learning_rate).minimize(cost)
            if opt == 'Adadelta':
                self.optimizer = tf.train.AdadeltaOptimizer(
                    self.learning_rate).minimize(cost)

            # self.optimizer = tf.no_op()
        else:
            self.optimizer = tf.no_op()
            self.cost = tf.no_op()
Exemple #55
0
            def build_example(label, param_dict_real):
                """Build the model with parameter values set in param_dict_real.

        Args:
          label: Label of the model (i.e. the filename in the zip).
          param_dict_real: Parameter dictionary (arguments to the factories
            make_graph and make_test_inputs)

        Returns:
          (tflite_model_binary, report) where tflite_model_binary is the
          serialized flatbuffer as a string and report is a dictionary with
          keys `toco_log` (log of toco conversion), `tf_log` (log of tf
          conversion), `toco` (a string of success status of the conversion),
          `tf` (a string success status of the conversion).
        """

                np.random.seed(RANDOM_SEED)
                report = {"toco": report_lib.NOTRUN, "tf": report_lib.FAILED}

                # Build graph
                report["tf_log"] = ""
                report["toco_log"] = ""
                tf.compat.v1.reset_default_graph()

                with tf.device("/cpu:0"):
                    try:
                        inputs, outputs = make_graph(param_dict_real)
                    except (tf.errors.UnimplementedError,
                            tf.errors.InvalidArgumentError, ValueError):
                        report["tf_log"] += traceback.format_exc()
                        return None, report

                sess = tf.compat.v1.Session()
                try:
                    baseline_inputs, baseline_outputs = (make_test_inputs(
                        param_dict_real, sess, inputs, outputs))
                except (tf.errors.UnimplementedError,
                        tf.errors.InvalidArgumentError, ValueError):
                    report["tf_log"] += traceback.format_exc()
                    return None, report
                report["toco"] = report_lib.FAILED
                report["tf"] = report_lib.SUCCESS
                # Convert graph to toco
                input_tensors = [(input_tensor.name.split(":")[0],
                                  input_tensor.shape, input_tensor.dtype)
                                 for input_tensor in inputs]
                output_tensors = [
                    _normalize_output_name(out.name) for out in outputs
                ]
                # pylint: disable=g-long-ternary
                graph_def = freeze_graph(
                    sess,
                    tf.global_variables() + inputs +
                    outputs) if use_frozen_graph else sess.graph_def

                if "split_tflite_lstm_inputs" in param_dict_real:
                    extra_toco_options.split_tflite_lstm_inputs = param_dict_real[
                        "split_tflite_lstm_inputs"]
                tflite_model_binary, toco_log = options.tflite_convert_function(
                    options,
                    graph_def,
                    input_tensors,
                    output_tensors,
                    extra_toco_options=extra_toco_options,
                    test_params=param_dict_real)
                report["toco"] = (report_lib.SUCCESS if tflite_model_binary
                                  is not None else report_lib.FAILED)
                report["toco_log"] = toco_log

                if options.save_graphdefs:
                    archive.writestr(label + ".pbtxt",
                                     text_format.MessageToString(graph_def),
                                     zipfile.ZIP_DEFLATED)

                if tflite_model_binary:
                    if options.make_edgetpu_tests:
                        # Set proper min max values according to input dtype.
                        baseline_inputs, baseline_outputs = generate_inputs_outputs(
                            tflite_model_binary, min_value=0, max_value=255)
                    archive.writestr(label + ".bin", tflite_model_binary,
                                     zipfile.ZIP_DEFLATED)
                    example = {
                        "inputs": baseline_inputs,
                        "outputs": baseline_outputs
                    }

                    example_fp = StringIO()
                    write_examples(example_fp, [example])
                    archive.writestr(label + ".inputs", example_fp.getvalue(),
                                     zipfile.ZIP_DEFLATED)

                    example_fp2 = StringIO()
                    write_test_cases(example_fp2, label + ".bin", [example])
                    archive.writestr(label + "_tests.txt",
                                     example_fp2.getvalue(),
                                     zipfile.ZIP_DEFLATED)

                    zip_manifest.append(label + "\n")

                return tflite_model_binary, report
    #
    # loadModel = [loadModelAgent1,loadModelAgent2, loadModelAgent3, loadModelAgent4] #indicate where the saved model is
    loadModel = [loadModelAgent1,loadModelAgent2, loadModelAgent3, loadModelAgent4] #indicate where the saved model is
    #
    # loadModel = "" #indicate where the saved model is

    # #Parameters for controling the experiment
    isLogging = False #Logg the experiment

    isPlotting = True #plot the experiment

    plotFrequency = 1000 #plot the plots every X games

    createDataset = True # weather to save the dataset

    saveExperimentsIn = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Random/1000/NewQPlot" # Directory where the experiment will be saved

    metrics = ChefsHatExperimentHandler.runExperiment(numGames=numGames, playersAgents=playersAgents,experimentDescriptor=experimentDescriptor,isLogging=isLogging,isPlotting=isPlotting,plotFrequency = plotFrequency, createDataset=createDataset,saveExperimentsIn=saveExperimentsIn, loadModel=loadModel, rewardFunction=reward)

    print ("Metrics:" + str(metrics))


config = tf.ConfigProto()
config.gpu_options.allow_growth = True

sess = tf.Session(config=config)
# from keras import backend as K
K.set_session(sess)

with tf.device('/gpu:0'):
    runModel()
Exemple #57
0
print cat_num_total
print len(x)
print len(y)

# Evaluation
# ==================================================

prob_max=0.0
prob_min=1.0
total_val=0
total_num=len(x)
right_num=0
graph = tf.Graph()
total_right_val=0
with graph.as_default(), tf.device('/cpu:0'):
    output_graph_def = tf.GraphDef()
    output_graph_path = FLAGS.model_dir
    with open(output_graph_path, 'rb') as f:
        output_graph_def.ParseFromString(f.read())
        _ = tf.import_graph_def(output_graph_def, name='')
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    with sess.as_default():

        tf.initialize_all_variables().run()
        input_x = sess.graph.get_tensor_by_name('import/dev_x:0')
        input_y = sess.graph.get_tensor_by_name('import/dev_y:0')
        dropout_keep_prob = sess.graph.get_tensor_by_name('import/dropout_keep_prob:0')
        scores = sess.graph.get_tensor_by_name('import/output/scores:0') 
                        "Ep:",
                        GLOBAL_EP,
                        "| Ep_r: %i" % GLOBAL_MEAN_R[-1],
                    )
                    GLOBAL_EP += 1
                    if GLOBAL_MEAN_R[-1] > MAX_R and GLOBAL_MEAN_R[-1] > -350:
                        # saver.save(SESS, 'model_adv/single',global_step=GLOBAL_EP)
                        print("save episode:", GLOBAL_EP)
                        MAX_R = GLOBAL_MEAN_R[-1]
                    break


if __name__ == "__main__":
    SESS = tf.Session()

    with tf.device("/cpu:0"):
        OPT_A = tf.train.RMSPropOptimizer(LR_A, name='RMSPropA')
        OPT_C = tf.train.RMSPropOptimizer(LR_C, name='RMSPropC')
        GLOBAL_AC = ACNet(GLOBAL_NET_SCOPE)  # we only need its params
        workers = []
        # Create worker
        for i in range(N_WORKERS):
            i_name = 'W_%i' % i  # worker name
            workers.append(Worker(i_name, GLOBAL_AC))

    COORD = tf.train.Coordinator()
    saver = tf.train.Saver()
    SESS.run(tf.global_variables_initializer())

    worker_threads = []
    for worker in workers:
Exemple #59
0
def try_all_gpus():
    """Return all available GPUs, or [cpu(),] if no GPU exists."""
    num_gpus = len(tf.config.experimental.list_physical_devices('GPU'))
    devices = [tf.device(f'/GPU:{i}') for i in range(num_gpus)]
    return devices if devices else [tf.device('/CPU:0')]
Exemple #60
0
def train(options,
          data,
          n_gpus,
          tf_save_dir,
          tf_log_dir,
          logger,
          restart_ckpt_file=None):
    with tf.device('/cpu:0'):
        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        # set up the optimizer
        lr = options.get('learning_rate', 0.2)
        opt = tf.train.AdagradOptimizer(learning_rate=lr,
                                        initial_accumulator_value=1.0)

        # calculate the gradients on each GPU
        tower_grads = []
        models = []
        train_perplexity = tf.get_variable(
            'train_perplexity', [],
            initializer=tf.constant_initializer(0.0),
            trainable=False)
        norm_summaries = []
        for k in range(n_gpus):
            with tf.device('/gpu:%d' % k):
                with tf.variable_scope('lm', reuse=k > 0):
                    # calculate the loss for one model replica and get
                    #   lstm states
                    model = SentenceLanguageModel(options, True)
                    loss = model.total_loss
                    models.append(model)
                    # get gradients
                    grads = opt.compute_gradients(
                        loss * options['unroll_steps'],
                        aggregation_method=\
                            tf.AggregationMethod.EXPERIMENTAL_TREE,
                    )
                    tower_grads.append(grads)
                    # keep track of loss across all GPUs
                    train_perplexity += loss

        print_variable_summary()

        # calculate the mean of each gradient across all GPUs
        grads = average_gradients(tower_grads, options['batch_size'], options)
        grads, norm_summary_ops = clip_grads(grads, options, True, global_step)
        norm_summaries.extend(norm_summary_ops)

        # log the training perplexity
        train_perplexity = tf.exp(train_perplexity / n_gpus)
        perplexity_summmary = tf.summary.scalar('train_perplexity',
                                                train_perplexity)

        # some histogram summaries.  all models use the same parameters
        # so only need to summarize one
        histogram_summaries = [
            tf.summary.histogram('token_embedding', models[0].embedding)
        ]
        # tensors of the output from the LSTM layer
        lstm_out = tf.get_collection('lstm_output_embeddings')
        histogram_summaries.append(
            tf.summary.histogram('lstm_embedding_0', lstm_out[0]))
        if options.get('bidirectional', False):
            # also have the backward embedding
            histogram_summaries.append(
                tf.summary.histogram('lstm_embedding_1', lstm_out[1]))

        # apply the gradients to create the training operation
        train_op = opt.apply_gradients(grads, global_step=global_step)

        # histograms of variables
        for v in tf.global_variables():
            histogram_summaries.append(\
                tf.summary.histogram(v.name.replace(":", "_"), v))

        # get the gradient updates -- these aren't histograms, but we'll
        # only update them when histograms are computed
        histogram_summaries.extend(summary_gradient_updates(grads, opt, lr))

        saver = tf.train.Saver(tf.global_variables(), max_to_keep=2)
        summary_op = tf.summary.merge([perplexity_summmary] + norm_summaries)
        hist_summary_op = tf.summary.merge(histogram_summaries)

        init = tf.initializers.global_variables()

    # do the training loop
    bidirectional = options.get('bidirectional', False)
    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        sess.run(init)

        # load the checkpoint data if needed
        if restart_ckpt_file is not None:
            loader = tf.train.Saver()
            loader.restore(sess, restart_ckpt_file)

        summary_writer = tf.summary.FileWriter(tf_log_dir, sess.graph)

        # For each batch:
        # Get a batch of data from the generator. The generator will
        # yield batches of size batch_size * n_gpus that are sliced
        # and fed for each required placeholer.

        batch_size = options['batch_size']
        unroll_steps = options['unroll_steps']
        epochs = options['n_epochs']
        log_interval = options['log_interval']
        checkpoint_interval = options['checkpoint_interval']

        char_inputs = 'char_cnn' in options
        logger.info('Start training loop')

        t1 = time.time()
        for epoch in range(epochs):
            data_gen = data.iter_batches(batch_size * n_gpus, unroll_steps)
            for batch_no, batch in enumerate(data_gen, start=1):
                # slice the input in the batch for the feed_dict
                X = batch
                feed_dict = {}
                for k in range(n_gpus):
                    model = models[k]
                    start = k * batch_size
                    end = (k + 1) * batch_size

                    feed_dict.update(
                        _get_feed_dict_from_X(X, start, end, model,
                                              char_inputs, bidirectional))

                if batch_no % checkpoint_interval != 0:
                    ret = sess.run([train_op, summary_op, train_perplexity],
                                   feed_dict=feed_dict)
                else:
                    # also run the histogram summaries
                    ret = sess.run([
                        train_op, summary_op, train_perplexity, hist_summary_op
                    ],
                                   feed_dict=feed_dict)

                if batch_no % checkpoint_interval == 0:
                    summary_writer.add_summary(ret[3], batch_no)
                if batch_no % log_interval == 0:
                    # write the summaries to tensorboard and display perplexity
                    summary_writer.add_summary(ret[1], batch_no)
                    logger.info(f'Epoch {epoch} | Batch {batch_no} | '
                                f'train_perplexity={ret[2]}')
                    logger.info(f'Total time: {time.time() - t1}')

                if batch_no % checkpoint_interval == 0:
                    # save the model
                    checkpoint_path = os.path.join(tf_save_dir, 'model.ckpt')
                    saver.save(sess, checkpoint_path, global_step=global_step)

            checkpoint_path = os.path.join(tf_save_dir,
                                           f'model_epoch{epoch:02d}.ckpt')
            saver.save(sess, checkpoint_path, global_step=global_step)