def get_run_op(): tower_grads = [] devices = get_all_devices() losses = [] if FLAGS.num_workers == 1: devices = [] for i in range(FLAGS.phy_blocks): devices.append('/gpu:0') with tf.device('/gpu:0'): data, labels = fake_data(FLAGS.batch_size, 1) weights = initialize_weights() logit = inference(data, weights) _loss = loss(logit, labels) opt = tf.train.GradientDescentOptimizer(learning_rate=0.5, name=("opt%d" % i)) tower_grads.append(opt.compute_gradients(_loss, weights)) else: for i in range(FLAGS.num_workers): with tf.device('/gpu:%d' % i): data, labels = fake_data(FLAGS.batch_size, 1) weights = initialize_weights() logit = inference(data, weights) _loss = loss(logit, labels) opt = tf.train.GradientDescentOptimizer(learning_rate=0.5, name=("opt%d" % i)) tower_grads.append(opt.compute_gradients(_loss, weights)) return aggregrate_gradients(tower_grads, devices)
def get_run_op(): # Create an optimizer that performs gradient descent. #opt = tf.train.GradientDescentOptimizer(learning_rate=0.01) slice_size = FLAGS.batch_size / FLAGS.num_cuts print('Slice size:{}'.format(slice_size)) data = None label = None last_fc = [tf.no_op()] with tf.device('/gpu:0'): data = tf.get_variable( name = 'data', shape=[slice_size, FLAGS.hidden_size], trainable=False) ''' label = tf.get_variable( name = 'label', shape = [slice_size, FLAGS.hidden_size], trainable=False)) with tf.variable_scope('fc_in'): weight_in = tf.zeros([1000, FLAGS.hidden_size]) for k in xrange(FLAGS.num_cuts): with tf.control_dependencies([last_fc[-1]]): last_fc.append(tf.matmul(data[k+1], weight_in)) ''' for i in xrange(FLAGS.num_cuts): last_fc.append(data) for i in xrange(FLAGS.num_layers): dev = '/gpu:%d' % (i * FLAGS.num_gpus / FLAGS.num_layers) with tf.device(dev), scopes.arg_scope([variables.variable], device=dev): tmp_fc = [tf.no_op()] with tf.variable_scope('fc%d' % i): w = tf.get_variable( name='w', shape=[FLAGS.hidden_size, FLAGS.hidden_size], trainable=True) for k in xrange(FLAGS.num_cuts): with tf.control_dependencies([tmp_fc[-1]]): tmp_fc.append(tf.matmul(last_fc[k+1], w)) last_fc = tmp_fc if i == FLAGS.num_layers - 1: with tf.control_dependencies(last_fc): train_op = tf.no_op() ''' with tf.device('/gpu:%d' % (FLAGS.num_gpus - 1)): tmp_fc = [tf.no_op()] with tf.variable_scope('fc_out'): weight_out = tf.zeros([FLAGS.hidden_size, 1000]) for k in xrange(FLAGS.num_cuts): with tf.control_dependencies([tmp_fc[-1]]): tmp_fc.append(tf.matmul(last_fc[k+1], weight_out)) last_fc = tmp_fc loss = tf.nn_softmax_cross_entropy_with_logits(last_fc, labels, name='xentropy') grads = opt.compute_gradients(loss) apply_gradient_op = opt.apply_gradients(grads) train_op = tf.group(apply_gradient_op) ''' init_op = tf.initialize_all_variables() return init_op, train_op
def __init__(self, planes, args, phase=1, filters=192, board_size=15, model_dir="./value_net_models", model_file=None, device="gpu", gpu=1, optimizer="sgd", learn_rate=1e-6, distributed_train=False): self.board_size = board_size self.phase = phase self.planes = planes # init network if distributed_train: ps_device = "/job:ps/task:0/cpu:0" worker_device = "/job:worker/task:%d/gpu:%d" % (args.task_index, args.gpu_id) else: ps_device = "/cpu:0" if device == "cpu": worker_device = "/cpu:0" else: worker_device = "/gpu:%d" % gpu self.tf_var = dict() self.tf_var["in"], self.tf_var["out"] = AI_net.create_value_network( planes, ps_device, worker_device, filters=filters, board_size=self.board_size, name_prefix="value_net") # super init AI_net.SuperNetwork.__init__(self, model_dir=model_dir) history_step = int(self.param_unserierlize(init_params={"global_step": 0})["global_step"]) with tf.device(ps_device): self.global_step = tf.Variable(history_step) # loss function with tf.device(worker_device): self.loss_function(optimizer, learn_rate, args.values_net_batch_size)
def __init__( self, remote_device, local_device, top_delta_size=64, top_delta_layers=2, compute_h_size=64, compute_h_layers=1, delta_dim=32, num_grad_channels=4, normalize_epsilon=1., ): self.local_device = local_device self.remote_device = remote_device self.top_delta_size = top_delta_size self.top_delta_layers = top_delta_layers self.compute_h_size = compute_h_size self.compute_h_layers = compute_h_layers self.delta_dim = delta_dim self.num_grad_channels = num_grad_channels self.normalize_epsilon = normalize_epsilon, with tf.device(local_device): self.opt = optimizers.UnrollableGradientDescentRollingOptimizer( learning_rate=1e-4) # lazily initialized for readouts self.readout_mods = {} super(MoreLocalWeightUpdateProcess, self).__init__(name='MoreLocalWeightUpdateProcess') with tf.device(remote_device): self()
def __init__(self, session, np_matrix, rank, learning_rate=0.1): matrix = tf.constant(np_matrix, dtype=tf.float32) scale = 2 * np.sqrt(np_matrix.mean() / rank) initializer = tf.random_uniform_initializer(maxval=scale) with tf.device('/job:ps/task:0'): self.matrix_W = tf.get_variable( "W", (np_matrix.shape[0], rank), initializer=initializer ) with tf.device("/job:ps/task:1"): self.matrix_H = tf.get_variable( "H", (rank, np_matrix.shape[1]), initializer=initializer ) matrix_WH = tf.matmul(self.matrix_W, self.matrix_H) f_norm = tf.reduce_sum(tf.pow(matrix - matrix_WH, 2)) nn_w = tf.reduce_sum(tf.abs(self.matrix_W) - self.matrix_W) nn_h = tf.reduce_sum(tf.abs(self.matrix_H) - self.matrix_H) constraint = INFINITY * (nn_w + nn_h) self.loss = f_norm + constraint self.constraint = constraint self.session = session self.optimizer = tf.train.GradientDescentOptimizer( learning_rate ).minimize(self.loss)
def testHandleDeletion(self): if not tf.test.is_built_with_cuda(): return True if not self.haveGpu0(): return True dtype = tf.float32 config = tf.ConfigProto(log_device_placement=True) sess = tf.Session(config=config) # initial values live on CPU with tf.device("/cpu:0"): one = tf.constant(1, dtype=dtype) one_handle = sess.run(tf.get_session_handle(one)) x_handle = sess.run(tf.get_session_handle(one)) # addition lives on GPU with tf.device("/gpu:0"): add_holder1, add_tensor1 = tf.get_session_tensor(one_handle.handle, dtype) add_holder2, add_tensor2 = tf.get_session_tensor(one_handle.handle, dtype) add_op = tf.add(add_tensor1, add_tensor2) add_output = tf.get_session_handle(add_op) # add 1 to tensor 20 times to exceed _DEAD_HANDLES_THRESHOLD for _ in range(20): x_handle = sess.run(add_output, feed_dict={add_holder1: one_handle.handle, add_holder2: x_handle.handle})
def _apply_drop_path(self, net): """Apply drop_path regularization to net.""" drop_path_keep_prob = self._drop_path_keep_prob if drop_path_keep_prob < 1.0: # Scale keep prob by layer number assert self._cell_num != -1 # The added 2 is for the reduction cells num_cells = self._total_num_cells layer_ratio = (self._cell_num + 1)/float(num_cells) with tf.device('/cpu:0'): tf.summary.scalar('layer_ratio', layer_ratio) drop_path_keep_prob = 1 - layer_ratio * (1 - drop_path_keep_prob) # Decrease the keep probability over time current_step = tf.cast(tf.train.get_or_create_global_step(), tf.float32) drop_path_burn_in_steps = self._total_training_steps current_ratio = ( current_step / drop_path_burn_in_steps) current_ratio = tf.minimum(1.0, current_ratio) with tf.device('/cpu:0'): tf.summary.scalar('current_ratio', current_ratio) drop_path_keep_prob = ( 1 - current_ratio * (1 - drop_path_keep_prob)) with tf.device('/cpu:0'): tf.summary.scalar('drop_path_keep_prob', drop_path_keep_prob) net = drop_path(net, drop_path_keep_prob) return net
def testColocation(self): with tf.device("/job:ps"): var = tf.Variable(0, name="v") with tf.device("/job:worker/task:7"): assign_op = var.assign(1) self.assertDeviceEqual("/job:ps", assign_op.device) self.assertEqual([b"loc:@v"], assign_op.op.colocation_groups())
def testReturnsSingleCheckpointIfOneShardedCheckpoint(self): checkpoint_dir = os.path.join(self.get_temp_dir(), 'one_checkpoint_found_sharded') if not tf.gfile.Exists(checkpoint_dir): tf.gfile.MakeDirs(checkpoint_dir) global_step = tf.contrib.framework.get_or_create_global_step() # This will result in 3 different checkpoint shard files. with tf.device('/cpu:0'): tf.Variable(10, name='v0') with tf.device('/cpu:1'): tf.Variable(20, name='v1') saver = tf.train.Saver(sharded=True) with tf.Session( target='', config=tf.ConfigProto(device_count={'CPU': 2})) as session: session.run(tf.initialize_all_variables()) save_path = os.path.join(checkpoint_dir, 'model.ckpt') saver.save(session, save_path, global_step=global_step) num_found = 0 for _ in tf.contrib.training.checkpoints_iterator( checkpoint_dir, timeout=0): num_found += 1 self.assertEqual(num_found, 1)
def _model_fn(features, labels, mode, params): model_fn = MODELS[FLAGS.model].model_fn global_step = tf.train.get_or_create_global_step() if FLAGS.num_gpus > 0 and mode == learn.ModeKeys.TRAIN: split_features = {k: tf.split(v, FLAGS.num_gpus) for k, v in features.iteritems()} split_labels = {k: tf.split(v, FLAGS.num_gpus) for k, v in labels.iteritems()} grads = [] predictions = collections.defaultdict(list) losses = [] opt = ops.create_optimizer( params.optimizer, params.learning_rate, params.decay_steps) for i in range(FLAGS.num_gpus): with tf.device(tf.DeviceSpec(device_type='GPU', device_index=i)): with tf.name_scope('tower_%d' % i): with tf.variable_scope(tf.get_variable_scope(), reuse=i > 0): device_features = {k: v[i] for k, v in split_features.iteritems()} device_labels = {k: v[i] for k, v in split_labels.iteritems()} device_predictions, device_loss = model_fn( device_features, device_labels, mode, params) for k, v in device_predictions.iteritems(): predictions[k].append(v) if device_loss is not None: losses.append(device_loss) device_grads = opt.compute_gradients(device_loss) grads.append(device_grads) grads = ops.average_gradients(grads) train_op = opt.apply_gradients(grads, global_step=global_step) for k, v in predictions.iteritems(): predictions[k] = tf.concat(v, axis=0) loss = tf.add_n(losses) if losses else None else: with tf.device(tf.DeviceSpec(device_type='GPU', device_index=0)): predictions, loss = model_fn(features, labels, mode, params) train_op = None if mode == learn.ModeKeys.TRAIN: opt = ops.create_optimizer( params.optimizer, params.learning_rate, params.decay_steps) train_op = opt.minimize(loss, global_step=global_step) tf.summary.scalar('loss/loss', loss) return tf.contrib.learn.ModelFnOps( mode=mode, predictions=predictions, loss=loss, train_op=train_op)
def testAnalysisAndAllocations(self): run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() config = tf.ConfigProto(device_count={'CPU': 3}) with tf.Session(config=config) as sess: with tf.device('/cpu:0'): const1 = tf.constant(1.0, name='const1') with tf.device('/cpu:1'): const2 = tf.constant(2.0, name='const2') with tf.device('/cpu:2'): result = const1 + const2 + const1 * const2 sess.run(result, options=run_options, run_metadata=run_metadata) self.assertTrue(run_metadata.HasField('step_stats')) tl = timeline.Timeline(run_metadata.step_stats) step_analysis = tl.analyze_step_stats() ctf = step_analysis.chrome_trace.format_to_string() self._validateTrace(ctf) maximums = step_analysis.allocator_maximums self.assertTrue('cpu' in maximums) cpu_max = maximums['cpu'] # At least const1 + const2, both float32s (4 bytes each) self.assertGreater(cpu_max.num_bytes, 8) self.assertGreater(cpu_max.timestamp, 0) self.assertTrue('const1' in cpu_max.tensors) self.assertTrue('const2' in cpu_max.tensors)
def test_single_output(self): print('*** Running Test: ' + self.__class__.__name__ + ' function: ' + _getframe().f_code.co_name) class AddOp(Operator): def op(self, x, y): pos = position_in(x.shape) out = output_like(x) out[pos] = x[pos] + y[pos] return out in0 = np.random.random(5).astype(np.float32) in1 = np.random.random(5).astype(np.float32) reference = 4*(in0 + in1)*(in0 + in1) with tf.Session() as sess: with tf.device('/cpu:0'): a = in0*2 b = in1*2 c = AddOp(a, b, clear_cache=True).as_tensorflow() squared = tf.square(c) if cuda_enabled: with tf.device('/gpu:0'): a_gpu = in0*2 b_gpu = in1*2 c_gpu = AddOp(a_gpu, b_gpu).as_tensorflow() squared_gpu = tf.square(c_gpu) result, result_gpu = sess.run([squared, squared_gpu]) assert np.allclose(reference, result_gpu) else: result = sess.run([squared]) assert np.allclose(reference, result)
def extract_features(ids, path, output_path, extractor, batch_size=64): images_names = dict() for p in listdir(path): image_id = int(p.split('_')[-1].split('.')[0]) if image_id in ids: images_names[image_id] = p batch,names = [],[] with open(output_path,'w') as output_file: for idx,n in enumerate(images_names): p = join(path, images_names[n]) batch.append(load_image(p)) names.append(n) if len(batch)==batch_size: batch = np.stack(batch) feed_dict = {images: batch} with tf.device('/gpu:0'): features = sess.run(extractor, feed_dict=feed_dict) for n,f in zip(names,features): output_file.write("%s;%s\n" % (n, " ".join(str(x) for x in f))) print("%d/%d" % (idx,len(images_names))) batch, names = [],[] output_file.flush() if len(batch)>0: batch = np.stack(batch) feed_dict = {images: batch} with tf.device('/gpu:0'): features = sess.run(extractor, feed_dict=feed_dict) for n,f in zip(names,features): output_file.write("%s;%s\n" % (n, " ".join(str(x) for x in f))) print("%d/%d" % (idx,len(images_names))) output_file.flush()
def testManyCPUs(self): run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() config = tf.ConfigProto(device_count={'CPU': 3}) with tf.Session(config=config) as sess: with tf.device('/cpu:0'): const1 = tf.constant(1.0, name='const1') with tf.device('/cpu:1'): const2 = tf.constant(2.0, name='const2') with tf.device('/cpu:2'): result = const1 + const2 + const1 * const2 sess.run(result, options=run_options, run_metadata=run_metadata) self.assertTrue(run_metadata.HasField('step_stats')) step_stats = run_metadata.step_stats devices = [d.device for d in step_stats.dev_stats] self.assertTrue('/job:localhost/replica:0/task:0/cpu:0' in devices) self.assertTrue('/job:localhost/replica:0/task:0/cpu:1' in devices) self.assertTrue('/job:localhost/replica:0/task:0/cpu:2' in devices) tl = timeline.Timeline(step_stats) ctf = tl.generate_chrome_trace_format() self._validateTrace(ctf) tl = timeline.Timeline(step_stats) ctf = tl.generate_chrome_trace_format(show_dataflow=False) self._validateTrace(ctf) tl = timeline.Timeline(step_stats) ctf = tl.generate_chrome_trace_format(show_memory=False) self._validateTrace(ctf) tl = timeline.Timeline(step_stats) ctf = tl.generate_chrome_trace_format(show_memory=False, show_dataflow=False) self._validateTrace(ctf)
def pack_range(key, packing, grad_vars, rng): """Form the concatenation of a specified range of gradient tensors. Args: key: Value under which to store meta-data in packing that will be used later to restore the grad_var list structure. packing: Dict holding data describing packed ranges of small tensors. grad_vars: List of (grad, var) pairs for one tower. rng: A pair of integers giving the first, last indices of a consecutive range of tensors to be packed. Returns: A tensor that is the concatenation of all the specified small tensors. """ to_pack = grad_vars[rng[0]:rng[1] + 1] members = [] variables = [] restore_shapes = [] with tf.name_scope('pack'): for g, v in to_pack: variables.append(v) restore_shapes.append(g.shape) with tf.device(g.device): members.append(tf.reshape(g, [-1])) packing[key] = GradPackTuple( indices=range(rng[0], rng[1] + 1), vars=variables, shapes=restore_shapes) with tf.device(members[0].device): return tf.concat(members, 0)
def testClearDevices(self): graph1 = tf.Graph() with graph1.as_default(): with tf.device("/device:CPU:0"): a = tf.Variable(tf.constant(1.0, shape=[2, 2]), name="a") with tf.device("/job:ps/replica:0/task:0/gpu:0"): b = tf.Variable(tf.constant(2.0, shape=[2, 2]), name="b") with tf.device("/job:localhost/replica:0/task:0/cpu:0"): tf.matmul(a, b, name="matmul") self.assertEqual("/device:CPU:0", str(graph1.as_graph_element("a").device)) self.assertEqual("/job:ps/replica:0/task:0/device:GPU:0", str(graph1.as_graph_element("b").device)) self.assertEqual("/job:localhost/replica:0/task:0/device:CPU:0", str(graph1.as_graph_element("matmul").device)) orig_meta_graph, _ = meta_graph.export_scoped_meta_graph(graph=graph1) graph2 = tf.Graph() with graph2.as_default(): meta_graph.import_scoped_meta_graph(orig_meta_graph, clear_devices=True) self.assertEqual("", str(graph2.as_graph_element("a").device)) self.assertEqual("", str(graph2.as_graph_element("b").device)) self.assertEqual("", str(graph2.as_graph_element("matmul").device))
def _build_word_embeddings(self): n_tokens_vocab = self.options['n_tokens_vocab'] batch_size = self.options['batch_size'] unroll_steps = self.options['unroll_steps'] # LSTM options projection_dim = self.options['lstm']['projection_dim'] # the input token_ids and word embeddings self.token_ids = tf.placeholder(DTYPE_INT, shape=(batch_size, unroll_steps), name='token_ids') # the word embeddings with tf.device("/cpu:0"): self.embedding_weights = tf.get_variable( "embedding", [n_tokens_vocab, projection_dim], dtype=DTYPE, ) self.embedding = tf.nn.embedding_lookup(self.embedding_weights, self.token_ids) # if a bidirectional LM then make placeholders for reverse # model and embeddings if self.bidirectional: self.token_ids_reverse = tf.placeholder(DTYPE_INT, shape=(batch_size, unroll_steps), name='token_ids_reverse') with tf.device("/cpu:0"): self.embedding_reverse = tf.nn.embedding_lookup( self.embedding_weights, self.token_ids_reverse)
def all_avg_gradients(tower_gradvars, devices, param_server_device='/gpu:0', usenccl=True): if len(devices) == 1: return tower_gradvars num_devices = len(devices) avg_gradvars = [] for layer in zip(*tower_gradvars): grads_on_devices, vars_on_devices = zip(*layer) if have_nccl and usenccl: # Note: These nccl ops _must_ be run on all devices, else deadlock # print('ALL_AVG_GRADIENTS GRADS_ON_DEVICES:', # grads_on_devices) # DEBUG avg_grads_on_devices = nccl.all_sum(grads_on_devices) for d, device in enumerate(devices): with tf.device(device): avg_grads_on_devices[d] *= 1. / num_devices else: with tf.device(param_server_device): avg_grad = tf.reduce_mean(tf.stack(grads_on_devices), 0) avg_grads_on_devices = [avg_grad] * num_devices avg_gradvars_on_devices = zip(*(avg_grads_on_devices, vars_on_devices)) avg_gradvars.append(avg_gradvars_on_devices) return list(zip(*avg_gradvars))
def get_updates(self, loss, params): tower_gradvars = [] gdev_list = self._gdev_list global_scope = tf.get_variable_scope() for idev, device in enumerate(gdev_list): with tf.device(device), \ tf.variable_scope(global_scope, reuse=idev > 0), \ tf.name_scope('tower_%i' % idev): grads = self.optimizer.compute_gradients(loss, params) gradvars = zip(grads, params) tower_gradvars.append(gradvars) tower_gradvars = all_avg_gradients(tower_gradvars, gdev_list, usenccl=False) self.updates = [K.update_add(self.iterations, 1)] for device_num, device in enumerate(gdev_list): with tf.device(device): gradvars = tower_gradvars[device_num] opt_update = self.optimizer.apply_gradients( grads, global_step=self.iterations) self.updates.append(opt_update) return self.updates
def test_single_output(self): @operator() def add(x, y): pos = position_in(x.shape) out = output_like(x) out[pos] = x[pos] + y[pos] return out in0 = np.random.random(5).astype(np.float32) in1 = np.random.random(5).astype(np.float32) reference = 4*(in0 + in1)*(in0 + in1) test_config = tf.ConfigProto(allow_soft_placement=False) # Don't perform optimizations for tests so we don't inadvertently run # gpu ops on cpu test_config.graph_options.optimizer_options.opt_level = -1 with tf.Session(config=test_config) as sess: with tf.device('/cpu:0'): a = in0*2 b = in1*2 c = as_tensorflow(add(a, b)) squared = tf.square(c) if cuda_enabled: with tf.device('/gpu:0'): a_gpu = in0*2 b_gpu = in1*2 c_gpu = as_tensorflow(add(a_gpu, b_gpu)) squared_gpu = tf.square(c_gpu) result, result_gpu = sess.run([squared, squared_gpu]) assert np.allclose(reference, result_gpu) else: result = sess.run([squared]) assert np.allclose(reference, result)
def computeLoss(predicted,labels,weights,withAverage=False): labels = tf.cast(labels, tf.int64) #rescale logits by weight of the classe weighted_logits = tf.mul(predicted,weights) #performs softmax on weighted logits and compute cross entropy cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( weighted_logits, labels, name='cross_entropy_per_example') #mean cross entropy for the mini batch cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy') with tf.device("/cpu:0"): tf.scalar_summary('cross_entropy', cross_entropy_mean) #add the cross entropy loss to losses tf.add_to_collection('losses',cross_entropy_mean) #total loss as sum of all the losses losses = tf.get_collection('losses') loss = tf.add_n(losses, name='total_loss') with tf.device("/cpu:0"): tf.scalar_summary('loss', loss) if withAverage: #get exponential moving average loss loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg') loss_averages_op = loss_averages.apply(losses+[loss]) tf.scalar_summary('cross_entropy_running_average', loss_averages.average(loss)) return loss_averages_op else: return loss
def main(unused_argv): tf.logging.set_verbosity(FLAGS.log) if not tf.gfile.Exists(FLAGS.logdir): tf.gfile.MakeDirs(FLAGS.logdir) with tf.Graph().as_default(): # If ps_tasks is 0, the local device is used. When using multiple # (non-local) replicas, the ReplicaDeviceSetter distributes the variables # across the different devices. model = utils.get_module("baseline.models.%s" % FLAGS.model) hparams = model.get_hparams(FLAGS.config) # Run the Reader on the CPU cpu_device = ("/job:worker/cpu:0" if FLAGS.ps_tasks else "/job:localhost/replica:0/task:0/cpu:0") with tf.device(cpu_device): with tf.name_scope("Reader"): batch = reader.NSynthDataset( FLAGS.train_path, is_training=True).get_baseline_batch(hparams) with tf.device(tf.train.replica_device_setter(ps_tasks=FLAGS.ps_tasks)): train_op = model.train_op(batch, hparams, FLAGS.config) # Run training slim.learning.train( train_op=train_op, logdir=FLAGS.logdir, master=FLAGS.master, is_chief=FLAGS.task == 0, number_of_steps=hparams.max_steps, save_summaries_secs=FLAGS.save_summaries_secs, save_interval_secs=FLAGS.save_interval_secs)
def __call__(self, getter, name, *args, **kwargs): staging_ops = self.variable_mgr.staging_vars_on_devices[self.device_num] if name in staging_ops: put_op, get_op = staging_ops[name] return get_op real_var = getter(name, *args, **kwargs) shape = kwargs['shape'] dtype = kwargs['dtype'] trainable = kwargs['trainable'] if self.cpu_device: with tf.device(self.cpu_device): # This helps copying the weights from the parameter to this server only # once. if name in self.variable_mgr.staged_vars_on_cpu: cpu_var = self.variable_mgr.staged_vars_on_cpu[name] else: cpu_var = tf.identity(real_var) self.variable_mgr.staged_vars_on_cpu[name] = cpu_var var_to_stage = cpu_var else: var_to_stage = tf.identity(real_var) # de-reference the variable. with tf.device(self.devices[self.device_num]): staging_area = data_flow_ops.StagingArea([dtype], shapes=[shape]) put_op = staging_area.put([var_to_stage]) get_op = staging_area.get()[0] staging_ops[name] = (put_op, get_op) if trainable: # For trainable variables, they are managed separatedly through # apply_gradients. return get_op else: # For other shadow variables, the access is decoupled through a wrapper # class. return StagedModelVariable(real_var, get_op, self.variable_mgr)
def create_weight_variables(shape, seed, name, use_gpu=False): """ Create gaussian random neurons with mean 0 and std 0.1 **Paramters** shape: Shape of the layer """ #import ipdb; ipdb.set_trace() if len(shape) == 4: in_out = shape[0] * shape[1] * shape[2] + shape[3] else: in_out = shape[0] + shape[1] import math stddev = math.sqrt(3.0 / in_out) # XAVIER INITIALIZER (GAUSSIAN) initializer = tf.truncated_normal(shape, stddev=stddev, seed=seed) if use_gpu: with tf.device("/gpu"): return tf.get_variable(name, initializer=initializer, dtype=tf.float32) else: with tf.device("/cpu"): return tf.get_variable(name, initializer=initializer, dtype=tf.float32)
def all_sync_params(tower_params, devices, usenccl=True): """Assigns the params from the first tower to all others""" if len(devices) == 1: return tf.no_op() sync_ops = [] if have_nccl and usenccl: for param_on_devices in zip(*tower_params): # print('PARAM_ON_DEVICES: {}'.format(param_on_devices)) # DEBUG # Note: param_on_devices is [paramX_gpu0, paramX_gpu1, ...] param0 = param_on_devices[0] send_op, received_tensors = nccl.broadcast(param0, devices[1:]) sync_ops.append(send_op) for device, param, received in zip(devices[1:], param_on_devices[1:], received_tensors): with tf.device(device): sync_op = param.assign(received) sync_ops.append(sync_op) else: params0 = tower_params[0] for device, params in zip(devices, tower_params): with tf.device(device): for param, param0 in zip(params, params0): sync_op = param.assign(param0.read_value()) sync_ops.append(sync_op) return tf.group(*sync_ops)
def all_reduce_gradients(tower_grads, devices): average_grads = [] for grad_and_vars in zip(*tower_grads): # Note that each grad_and_vars looks like the following: # ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN)) grads = [] split_grads = [] assert len(grad_and_vars) == FLAGS.num_workers # Each GPU splits its own grad for i, (g, _) in enumerate(grad_and_vars): with tf.device(devices[i]): split_grads.append(tf.split(0, FLAGS.num_workers, g)) # Each GPU gatheres slices of grad from other GPUs to do average. for i, dev in enumerate(devices): with tf.device(dev): x = split_grads[i][i] for j in range(FLAGS.num_workers): if i == j: continue x += split_grads[j][i] grads.append(x / FLAGS.num_workers) grad = tf.concat(0, grads) # Keep in mind that the Variables are redundant because they are shared # across towers. So .. we will just return the first tower's pointer to # the Variable. v = grad_and_vars[0][1] grad_and_var = (grad, v) average_grads.append(grad_and_var) return average_grads
def train(): hyperparams = {'batch_size': 50, 'learning_rate': 0.0001, 'grad_decay': 0.95, 'grad_epsilon': 0.01, 'num_updates': 20000, 'grad_norm_clip': 5} with tf.device('/cpu:0'): model = TradingSystemsModel(hyperparams) loss = tb.Crossentropy(hyperparams) acc = tb.CatAcc(hyperparams) evaluator = tb.Evaluator(hyperparams, loss, acc) optim = tb.RMSPropOptim(hyperparams) trainer = tb.Trainer(model, hyperparams, loss, optim, evaluator) split = 90000 data = np.load('data/trading-systems.npz') print(data['ticks'].shape) train_xs = {'ticks': data['ticks'][:split]} train_y = data['targets'][:split] val_xs = {'ticks': data['ticks'][split:]} val_y = data['targets'][split:] with tf.device('/cpu:0'): trainer.train(train_xs, train_y, val_xs, val_y, val_cmp=True) evaluator.eval(model, val_xs, val_y)
def prepare_networks(gpu,image_batch, nb_cl, nb_groups): mean_img = tf.constant([123.68, 116.779, 103.939], dtype=tf.float32, shape=[1, 1, 1, 3], name='img_mean') scores = [] with tf.variable_scope('ResNet18'): with tf.device('/gpu:' + gpu): score = utils_resnet.ResNet18(image_batch-mean_img, phase='train',num_outputs=nb_cl*nb_groups) scores.append(score) scope = tf.get_variable_scope() scope.reuse_variables() # First score and initialization variables_graph = tf.get_collection(tf.GraphKeys.WEIGHTS, scope='ResNet18') scores_stored = [] with tf.variable_scope('store_ResNet18'): with tf.device('/gpu:' + gpu): score = utils_resnet.ResNet18(image_batch-mean_img, phase='test',num_outputs=nb_cl*nb_groups) scores_stored.append(score) scope = tf.get_variable_scope() scope.reuse_variables() variables_graph2 = tf.get_collection(tf.GraphKeys.WEIGHTS, scope='store_ResNet18') return variables_graph,variables_graph2,scores,scores_stored
def _add_shared_train_op(self): """Sets self._train_op, the op to run for training.""" # Take gradients of the trainable variables w.r.t. the loss function to minimize if self._hps.rl_training or self._hps.ac_training: loss_to_minimize = self._reinforce_shared_loss if self._hps.coverage: loss_to_minimize = self._reinforce_cov_total_loss else: loss_to_minimize = self._pgen_loss if self._hps.coverage: loss_to_minimize = self._pointer_cov_total_loss tvars = tf.trainable_variables() gradients = tf.gradients(loss_to_minimize, tvars, aggregation_method=tf.AggregationMethod.EXPERIMENTAL_TREE) # Clip the gradients with tf.device("/gpu:{}".format(self._hps.gpu_num)): grads, global_norm = tf.clip_by_global_norm(gradients, self._hps.max_grad_norm) # Add a summary tf.summary.scalar('global_norm', global_norm) # Apply adagrad optimizer optimizer = tf.train.AdagradOptimizer(self._hps.lr, initial_accumulator_value=self._hps.adagrad_init_acc) with tf.device("/gpu:{}".format(self._hps.gpu_num)): self._shared_train_op = optimizer.apply_gradients(zip(grads, tvars), global_step=self.global_step, name='train_step')
def build_generator(self): # placeholder is for feeding data image = tf.placeholder(tf.float32, [self.batch_size, self.dim_image]) # (batch_size, dim_image) local_image = tf.placeholder(tf.float32, [self.batch_size, self.dim_image]) query = tf.placeholder(tf.int32, [self.batch_size, MAX_QUERY_WORDS]) query_mask = tf.placeholder(tf.float32, [self.batch_size, MAX_QUERY_WORDS]) bbox = tf.placeholder(tf.float32, [self.batch_size, self.dim_coordinates]) # [image] embed image feature to dim_hidden image_emb = tf.nn.bias_add(tf.matmul(image, self.embed_image_W), self.embed_image_b) # (batch_size, dim_hidden) local_image_emb = tf.nn.bias_add(tf.matmul(local_image, self.embed_local_W), self.embed_local_b) # (batch_size, dim_hidden) score = tf.zeros([self.batch_size], tf.float32) state_lang = tf.zeros([self.batch_size, self.lstm_lang.state_size]) state_context = tf.zeros([self.batch_size, self.lstm_context.state_size]) state_local = tf.zeros([self.batch_size, self.lstm_local.state_size]) query_emb = tf.zeros([self.batch_size, self.dim_hidden]) for j in range(MAX_QUERY_WORDS): # language lstm with tf.variable_scope("lstm_lang"): output_lang, state_lang = self.lstm_lang(query_emb, state_lang) lang = tf.slice(state_lang, [0,0], [self.batch_size, self.dim_hidden]) # context lstm with tf.variable_scope("lstm_context"): output_context, state_context = self.lstm_context(tf.concat(1,[image_emb, lang]), state_context) context = tf.slice(state_context, [0,0], [self.batch_size, self.dim_hidden]) # local lstm with tf.variable_scope("lstm_local"): output_local, state_local = self.lstm_local(tf.concat(1,[local_image_emb, lang, bbox]), state_local) local = tf.slice(state_local, [0,0], [self.batch_size, self.dim_hidden]) context_emb = tf.nn.xw_plus_b(context, self.W_context, self.B_context) local_emb = tf.nn.xw_plus_b(local, self.W_local, self.B_local) word_pred = tf.add(context_emb, local_emb) max_prob_index = tf.argmax(word_pred, 1) # b labels = tf.expand_dims(query[:,j], 1) indices = tf.expand_dims(tf.range(0, self.batch_size, 1), 1) concated = tf.concat(1, [indices, labels]) with tf.device('/cpu:0'): onehot_labels = tf.sparse_to_dense(concated, tf.pack([self.batch_size, self.dict_words]), 1.0, 0.0) current_score = tf.mul(onehot_labels, word_pred) current_score = tf.reduce_sum(current_score, 1) current_score = tf.mul(current_score, query_mask[:,j]) current_score = tf.reshape(current_score, [1,self.batch_size]) current_score = tf.nn.softmax(current_score) score = tf.add(score, current_score) with tf.device("/cpu:0"): tf.get_variable_scope().reuse_variables() query_emb = tf.nn.embedding_lookup(self.query_emb_W, max_prob_index) return score, image, local_image, query, query_mask, bbox
def __init__( self, sequence_length, num_classes, vocab_size, embedding_size, filter_sizes, num_filters, l2_reg_lambda=0.0): # Placeholders for input, output and dropout self.input_x = tf.placeholder(tf.int32, [None, sequence_length], name="input_x") self.input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y") self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") # Keeping track of l2 regularization loss (optional) l2_loss = tf.constant(0.0) # Embedding layer """ <Variable> - W: 각 단어의 임베디드 벡터의 성분을 랜덤하게 할당 """ #with tf.device('/gpu:0'), tf.name_scope("embedding"): with tf.device('/cpu:0'), tf.name_scope("embedding"): W = tf.Variable( tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0), name="W") self.embedded_chars = tf.nn.embedding_lookup(W, self.input_x) self.embedded_chars_expanded = tf.expand_dims(self.embedded_chars, -1) # Create a convolution + maxpool layer for each filter size pooled_outputs = [] for i, filter_size in enumerate(filter_sizes): with tf.name_scope("conv-maxpool-%s" % filter_size): # Convolution Layer filter_shape = [filter_size, embedding_size, 1, num_filters] W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W") b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b") conv = tf.nn.conv2d( self.embedded_chars_expanded, W, strides=[1, 1, 1, 1], padding="VALID", name="conv") # Apply nonlinearity h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") # Maxpooling over the outputs pooled = tf.nn.max_pool( h, ksize=[1, sequence_length - filter_size + 1, 1, 1], strides=[1, 1, 1, 1], padding='VALID', name="pool") pooled_outputs.append(pooled) # Combine all the pooled features num_filters_total = num_filters * len(filter_sizes) self.h_pool = tf.concat(3, pooled_outputs) self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total]) # Add dropout with tf.name_scope("dropout"): self.h_drop = tf.nn.dropout(self.h_pool_flat, self.dropout_keep_prob) # Final (unnormalized) scores and predictions with tf.name_scope("output"): W = tf.get_variable( "W", shape=[num_filters_total, num_classes], initializer=tf.contrib.layers.xavier_initializer()) b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b") l2_loss += tf.nn.l2_loss(W) l2_loss += tf.nn.l2_loss(b) self.scores = tf.nn.xw_plus_b(self.h_drop, W, b, name="scores") # xw_plus_b = matmul(x, W) + b self.predictions = tf.argmax(self.scores, 1, name="predictions") # Calculate Mean cross-entropy loss with tf.name_scope("loss"): losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.scores, labels=self.input_y) self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss # Accuracy with tf.name_scope("accuracy"): correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
def __init__(self, is_trainning, embeddings, config): self.is_trainning = is_trainning self.hidden_num = hidden_num = config.hidden_num self.seq_length = seq_length = config.max_length self.class_num = class_num = config.class_num # init placeholder self.text_a = tf.placeholder(tf.int32, [None, seq_length], name='text_a') self.text_b = tf.placeholder(tf.int32, [None, seq_length], name='text_b') self.y = tf.placeholder(tf.int32, [None, class_num], name='y') # real length self.a_length = tf.placeholder(tf.int32, [None], name='a_length') self.b_length = tf.placeholder(tf.int32, [None], name='b_length') self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") # embedding layers with tf.device('/cpu:0'), tf.name_scope("embedding"): self.vocab_matrix = tf.Variable(embeddings, trainable=False) self.text_a_embed = tf.nn.embedding_lookup(self.vocab_matrix, self.text_a) self.text_b_embed = tf.nn.embedding_lookup(self.vocab_matrix, self.text_b) # Input Encoding with tf.name_scope('Input_Encoding'): a_bar = self.biLSTMBlock(self.text_a_embed, hidden_num, 'Input_Encoding/biLSTM', self.a_length) b_bar = self.biLSTMBlock(self.text_b_embed, hidden_num, 'Input_Encoding/biLSTM', self.b_length, isreuse=True) # Local Inference Modeling with tf.name_scope('Local_inference_Modeling'): # 计算a_bar与b_bar每个词语之间的相似度 with tf.name_scope('word_similarity'): attention_weights = tf.matmul(a_bar, tf.transpose(b_bar, [0, 2, 1])) attentionsoft_a = tf.nn.softmax(attention_weights) attentionsoft_b = tf.nn.softmax( tf.transpose(attention_weights, [0, 2, 1])) a_hat = tf.matmul(attentionsoft_a, b_bar) b_hat = tf.matmul(attentionsoft_b, a_bar) # 计算m_a, m_b with tf.name_scope("compute_m_a/m_b"): a_diff = tf.subtract(a_bar, a_hat) a_mul = tf.multiply(a_bar, a_hat) b_diff = tf.subtract(b_bar, b_hat) b_mul = tf.multiply(b_bar, b_hat) # m_a = [a_bar, a_hat, a_bar - a_hat, a_bar 'dot' a_hat] (14) # m_b = [b_bar, b_hat, b_bar - b_hat, b_bar 'dot' b_hat] (15) self.m_a = tf.concat([a_bar, a_hat, a_diff, a_mul], axis=2) self.m_b = tf.concat([b_bar, b_hat, b_diff, b_mul], axis=2) with tf.name_scope("Inference_Composition"): v_a = self.biLSTMBlock(self.m_a, hidden_num, 'Inference_Composition/biLSTM', self.a_length) v_b = self.biLSTMBlock(self.m_b, hidden_num, 'Inference_Composition/biLSTM', self.b_length, isreuse=True) # average pool and max pool v_a_avg = tf.reduce_mean(v_a, axis=1) v_b_avg = tf.reduce_mean(v_b, axis=1) v_a_max = tf.reduce_max(v_a, axis=1) v_b_max = tf.reduce_max(v_b, axis=1) v = tf.concat([v_a_avg, v_a_max, v_b_avg, v_b_max], axis=1) with tf.name_scope("output"): initializer = tf.random_normal_initializer(0.0, 0.1) with tf.variable_scope('feed_foward_layer1'): inputs = tf.nn.dropout(v, self.dropout_keep_prob) outputs = tf.layers.dense(inputs, hidden_num, tf.nn.relu, kernel_initializer=initializer) with tf.variable_scope('feed_foward_layer2'): outputs = tf.nn.dropout(outputs, self.dropout_keep_prob) self.logits = tf.layers.dense(outputs, class_num, tf.nn.tanh, kernel_initializer=initializer) # x = tf.Variable(tf.constant(2.0, shape=[32, 1]), dtype=tf.float32) # x = tf.constant(2.0, shape=[32, 1], dtype=tf.float32) # logits0, logits1 = tf.split(self.logits, [1, 1], 1) # self.logits_new = tf.concat([logits0, tf.multiply(logits1, x)], axis=1) self.score = tf.nn.softmax(self.logits, name='score') self.prediction = tf.argmax(self.score, 1, name="prediction") with tf.name_scope('cost'): self.cost = tf.nn.softmax_cross_entropy_with_logits_v2( labels=self.y, logits=self.logits) self.cost = tf.reduce_mean(self.cost) weights = [ v for v in tf.trainable_variables() if ('w' in v.name) or ('kernel' in v.name) ] l2_loss = tf.add_n([tf.nn.l2_loss(w) for w in weights]) * config.l2_lambda self.loss = l2_loss + self.cost self.accuracy = tf.reduce_mean( tf.cast(tf.equal(tf.argmax(self.y, axis=1), self.prediction), tf.float32)) if not is_trainning: return tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars), 5) optimizer = tf.train.AdamOptimizer(config.learning_rate) self.train_op = optimizer.apply_gradients(zip(grads, tvars))
def __init__(self, num_users, num_items, num_ratings, embedding_dim, reg_lambda): assert num_users >= 1 self.num_users = num_users assert num_items >= 1 self.num_items = num_items assert num_ratings >= 1 self.num_ratings = num_ratings assert embedding_dim >= 1 self.embedding_dim = embedding_dim assert reg_lambda >= 0 # Placeholders for input, output and dropout self.input_user_ids = tf.placeholder(tf.int32, [None], name="input_user_ids") self.input_per_user_count = tf.placeholder(tf.int32, [None], name="input_per_user_count") self.input_per_user_item_ids = tf.placeholder( tf.int32, [None, None], name="input_per_user_item_ids") self.input_per_user_ratings = tf.placeholder( tf.float32, [None, None], name="input_per_user_ratings") self.input_per_user_neg_ids = tf.placeholder( tf.int32, [None, None], name="input_per_user_neg_ids") num_users = tf.shape(self.input_user_ids)[0] batch_size = tf.reduce_sum(self.input_per_user_count) asrt1 = tf.assert_equal(num_users, tf.shape(self.input_per_user_count)[0]) asrt2 = tf.assert_equal(num_users, tf.shape(self.input_per_user_item_ids)[0]) asrt3 = tf.assert_equal(num_users, tf.shape(self.input_per_user_ratings)[0]) asrt4 = tf.assert_equal(num_users, tf.shape(self.input_per_user_neg_ids)[0]) # pu = per_user pu_mask = tf.sequence_mask(self.input_per_user_count, dtype=tf.float32) # embedding lookup layer with tf.device('/cpu:0'), tf.name_scope( 'embedding_lookup'), tf.control_dependencies( [asrt1, asrt2, asrt3, asrt4]): # get dimension of user_ids to match the per_user_* stuff expanded_user_ids = tf.expand_dims(self.input_user_ids, 1) expanded_user_em = embedding_lookup_layer(expanded_user_ids, self.num_users, self.embedding_dim, 'user_embedding') pu_item_em = embedding_lookup_layer(self.input_per_user_item_ids, self.num_items, self.embedding_dim, 'item_embedding') pu_neg_em = embedding_lookup_layer(self.input_per_user_neg_ids, self.num_items, self.embedding_dim, 'item_embedding', reuse=True) pu_item_bias = bias_lookup_layer(self.input_per_user_item_ids, self.num_items, 'item_embedding') pu_neg_bias = bias_lookup_layer(self.input_per_user_neg_ids, self.num_items, 'item_embedding', reuse=True) with tf.name_scope('bpr'): pu_em_delta = pu_item_em - pu_neg_em pu_bias_delta = pu_item_bias - pu_neg_bias pu_prediction_delta = tf.reduce_sum(expanded_user_em * pu_em_delta, axis=-1) + pu_bias_delta self.bpr_loss = pu_mask * tf.log( tf.sigmoid(-pu_prediction_delta) + 0.01) # TODO: log? # regularization with tf.name_scope('regularization'): self.reg_loss = (reg_lambda) / 2 * sum( tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) # loss with tf.name_scope('loss'): self.loss = tf.reduce_mean(self.bpr_loss) + self.reg_loss
def train(point_cloud_data): with tf.Graph().as_default(): with tf.device('/gpu:' + str(0)): #get the place holders point_clouds_ph, rot_ph = MODEL.placeholder_inputs(32, 1024) # is training place holder.. is_training_ph = tf.placeholder(tf.bool, shape=()) print(is_training_ph) batch = tf.Variable(0) bn_decay = get_bn_decay(batch) tf.summary.scalar('bn decay', bn_decay) print(bn_decay) #get model and loss pred = MODEL.get_model(point_clouds_ph, is_training_ph, bn_decay) loss, mat_diff_sum = MODEL.get_loss(pred, rot_ph) tf.summary.scalar("loss", loss) print(pred) print(loss) #correct = tf.equal(tf.argmax(pred, 1), tf.to_int64(rot_ph)) #accuracy = tf.reduce_sum(tf.cast(correct, tf.float32) / float(BATCH_SIZE)) #tf.summary.scalar("accuracy", accuracy) learning_rate = get_learning_rate(batch) tf.summary.scalar('learning rate', learning_rate) optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=MOMENTUM) train_op = optimizer.minimize(loss, global_step=batch) saver = tf.train.Saver() config = tf.ConfigProto() config.gpu_options.allow_growth = True config.allow_soft_placement = True config.log_device_placement = False sess = tf.Session(config = config) # add summary writers.. merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter(os.path.join(LOG_DIR, "train_test"), sess.graph) # init variables init = tf.global_variables_initializer() sess.run(init, {is_training_ph:True}) ops = {'pointclouds_pl': point_clouds_ph, 'rot_ph': rot_ph, 'is_training_pl': is_training_ph, 'pred': pred, 'loss': loss, "mat_diff_sum":mat_diff_sum, 'train_op': train_op, 'merged': merged, 'step': batch, 'point_cloud_data' : point_cloud_data} for epoch in range(MAX_EPOCH): log_string('-------------- EPOCH %03d ---------------------' % (epoch)) sys.stdout.flush() train_one_epoch(sess, ops, train_writer) save_path = saver.save(sess, os.path.join(LOG_DIR, "reg_model"), global_step=epoch ) log_string("Model saved in file: %s" % save_path)
def train_model(config, environ, train_data, test_data, trainval_data=None): """Trains a CIFAR model. Args: config: Config object environ: Environ object train_data: Dataset object test_data: Dataset object Returns: acc: Final test accuracy """ np.random.seed(0) if not hasattr(config, "seed"): tf.set_random_seed(1234) log.info("Setting tensorflow random seed={:d}".format(1234)) else: log.info("Setting tensorflow random seed={:d}".format(config.seed)) tf.set_random_seed(config.seed) if environ.verbose: verbose_level = 0 else: verbose_level = 2 if trainval_data is None: trainval_data = train_data log.info("Environment: {}".format(environ.__dict__)) log.info("Config: {}".format(config.__dict__)) save_folder = os.path.join(environ.save_folder, environ.exp_id) logs_folder = os.path.join(environ.logs_folder, environ.exp_id) with log.verbose_level(verbose_level): exp_logger = ExperimentLogger(logs_folder) if not hasattr(config, "seed"): data_seed = 0 else: data_seed = config.seed # Gets data iterators. train_iter = get_iter( train_data, batch_size=config.batch_size, shuffle=True, cycle=True, prefetch=config.prefetch, seed=data_seed, num_worker=25, queue_size=500) trainval_iter = get_iter( train_data, batch_size=config.batch_size, shuffle=True, cycle=True, prefetch=config.prefetch, num_worker=10, queue_size=200) test_iter = get_iter( test_data, batch_size=config.batch_size, shuffle=False, cycle=False, prefetch=config.prefetch, num_worker=10, queue_size=200) # Builds models. log.info("Building models") with tf.name_scope("Train"): with tf.variable_scope("Model", reuse=None): with tf.device(environ.device): m = CNNModelSR(config, is_training=True) with tf.name_scope("Valid"): # also include testing in this graph with tf.variable_scope("Model", reuse=True): with tf.device(environ.device): mvalid = CNNModelSR(config, is_training=False) # Initializes variables. with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: saver = tf.train.Saver() sess.run(tf.global_variables_initializer()) def train_step(): """Train step.""" batch = train_iter.next() feed_data = { m.input: np.expand_dims(batch["img"], axis=3), m.label: np.expand_dims( ut.crop_img(batch["label"], config.crop_border), axis=3) } cost, l2_loss, _ = sess.run( [m.cost, m.l2_loss, m.train_op], feed_dict=feed_data) return l2_loss def evaluate(data_iter, nbatches): """Runs evaluation.""" count = 0 PSNR = 0.0 SSIM = 0.0 crop_border = config.crop_border if nbatches == -1: iter_ = data_iter else: iter_ = range(nbatches) for bb in iter_: if nbatches == -1: batch = bb else: batch = data_iter.next() # deal with gray images is_rgb_img = False if len(batch["img"].shape) < 3 else True if not is_rgb_img: img_y = batch["img"] label_y = batch["label"] else: # note Matlab format is Ycbcr img_y = batch["img"][:, :, 0] img_cb = batch["img"][:, :, 1] img_cr = batch["img"][:, :, 2] label_y = batch["label"][:, :, 0] label_y = ut.crop_img(label_y, crop_border) feed_data = { mvalid.input: np.expand_dims(np.expand_dims(img_y, axis=0), axis=3) } output_img = sess.run(mvalid.output, feed_dict=feed_data) output_img = ut.clip_img(np.squeeze(output_img * 255.0)) # clip pixel value PSNR += ut.compute_psnr(output_img, label_y) SSIM += ut.compute_ssim(output_img, label_y) if not is_rgb_img: save_input_img = ut.crop_img( ut.post_process(img_y * 255.0), crop_border) save_output_img = ut.post_process(output_img) else: save_input_img = np.zeros_like(batch["img"]) # note OpenCV format is Ycrcb save_input_img[:, :, 0] = ut.clip_img(img_y * 255.0) save_input_img[:, :, 1] = img_cr save_input_img[:, :, 2] = img_cb save_input_img = ut.crop_img( ut.post_process( cv2.cvtColor( save_input_img.astype(np.uint8), cv2.COLOR_YCR_CB2BGR)), crop_border) save_output_img = np.zeros_like(save_input_img) save_output_img[:, :, 0] = output_img save_output_img[:, :, 1] = img_cr[crop_border:-crop_border, crop_border:-crop_border] save_output_img[:, :, 2] = img_cb[crop_border:-crop_border, crop_border:-crop_border] save_output_img = ut.post_process( cv2.cvtColor( save_output_img.astype(np.uint8), cv2.COLOR_YCR_CB2BGR)) cv2.imwrite( os.path.join(save_folder, "test_input_{:05d}.png".format(count + 1)), save_input_img) cv2.imwrite( os.path.join(save_folder, "test_output_{:05d}.png".format(count + 1)), save_output_img) count += 1 PSNR /= count SSIM /= count return PSNR, SSIM def save(): """Snapshots a model.""" if not os.path.isdir(save_folder): os.makedirs(save_folder) config_file = os.path.join(save_folder, "conf.json") environ_file = os.path.join(save_folder, "env.json") with open(config_file, "w") as f: f.write(config.to_json()) with open(environ_file, "w") as f: f.write(environ.to_json()) log.info("Saving to {}".format(save_folder)) saver.save( sess, os.path.join(save_folder, "model.ckpt"), global_step=m.global_step) def train(): """Train loop.""" lr = config.base_learn_rate lr_decay_steps = config.lr_decay_steps max_train_iter = config.max_train_iter m.assign_lr(sess, lr) if environ.verbose: loop = range(max_train_iter) else: loop = pb.get(max_train_iter) for niter in loop: # decrease learning rate if len(lr_decay_steps) > 0: if (niter + 1) == lr_decay_steps[0]: lr *= 0.1 m.assign_lr(sess, lr) lr_decay_steps.pop(0) l2_loss = train_step() if (niter + 1) % config.disp_iter == 0 or niter == 0: exp_logger.log_train_loss(niter, l2_loss) if (niter + 1) % config.valid_iter == 0 or niter == 0: log.info("Experment ID {}".format(environ.exp_id)) test_iter.reset() psnr, ssim = evaluate(test_iter, -1) exp_logger.log_valid_psnr(niter, psnr) exp_logger.log_valid_ssim(niter, ssim) if (niter + 1) % config.save_iter == 0: save() test_iter.reset() psnr, ssim = evaluate(test_iter, -1) return psnr, ssim psnr, ssim = train() return psnr, ssim
conv2_fmaps = 64 conv2_ksize = 3 conv2_stride = 1 conv2_pad = "SAME" conv2_dropout_rate = 0.25 pool3_fmaps = conv2_fmaps n_fc1 = 128 fc1_dropout_rate = 0.5 n_outputs = 10 reset_graph() with tf.device('/GPU:0'): with tf.name_scope("inputs"): X = tf.placeholder(tf.float32, shape=[None, n_inputs], name="X") X_reshaped = tf.reshape(X, shape=[-1, height, width, channels]) y = tf.placeholder(tf.int32, shape=[None], name="y") training = tf.placeholder_with_default(False, shape=[], name='training') conv1 = tf.layers.conv2d(X_reshaped, filters=conv1_fmaps, kernel_size=conv1_ksize, strides=conv1_stride, padding=conv1_pad, activation=tf.nn.relu, name="conv1") conv2 = tf.layers.conv2d(conv1, filters=conv2_fmaps, kernel_size=conv2_ksize, strides=conv2_stride, padding=conv2_pad, activation=tf.nn.relu, name="conv2") with tf.name_scope("pool3"):
def create_train_op(model_config, inputs, opt, num_gpus=1, histograms=False): with tf.get_default_graph().as_default(), tf.device('/cpu:0'): tower_grads = [] model = None losses = [] total_loss = [] global_step = slim.get_or_create_global_step() with tf.variable_scope(tf.get_variable_scope()): for i in xrange(num_gpus): with tf.device('/gpu:%d' % i): with tf.name_scope('%s_%d' % (TOWER_NAME, i)) as scope: # Calculate the loss for one tower of the CIFAR model. This function # constructs the entire CIFAR model but shares the variables across # all towers. losses, total_loss, model = tower_loss(model_config, inputs, scope, is_train=True) # Reuse variables for the next tower. tf.get_variable_scope().reuse_variables() # Calculate the gradients for the batch of data on this CIFAR tower. grads = opt.compute_gradients(total_loss) # Keep track of the gradients across all towers. tower_grads.append(grads) summaries = [] for l in losses + [total_loss]: # Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training # session. This helps the clarity of presentation on tensorboard. loss_name = l.op.name loss_summary = tf.summary.scalar(loss_name, l) summaries.append(loss_summary) # We must calculate the mean of each gradient. Note that this is the # synchronization point across all towers. grads = average_gradients(tower_grads) # Add histograms for gradients. if histograms: for grad, var in grads: if grad is not None: summaries.append(tf.summary.histogram(var.op.name + '/gradients', grad)) # Apply the gradients to adjust the shared variables. apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) # Add histograms for trainable variables. if histograms: for var in tf.trainable_variables(): summaries.append(tf.summary.histogram(var.op.name, var)) # Track the moving averages of all trainable variables. variable_averages = tf.train.ExponentialMovingAverage( MOVING_AVERAGE_DECAY, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) # Group all updates to into a single train op. train_op = tf.group(apply_gradient_op, variables_averages_op) with tf.name_scope('train_op'): # Ensure the train_tensor computes grad_updates. train_op = with_dependencies([train_op], total_loss) return train_op, model, summaries
def __init__(self, conf, num_quantized_chars): self.input_x_d = tf.placeholder(tf.int32, [None, conf.max_char_length_d], name="input_x_d") self.input_x_se = tf.placeholder(tf.int32, [None, conf.max_char_length_s], name="input_x_se") self.training = tf.placeholder(tf.int32, name="trainable") self.list_d_s = [self.input_x_d, self.input_x_se] if self.training == 0: TRAIN = False else: TRAIN = True self.l2_loss = tf.constant(0.0) self.W0 = tf.get_variable("W", [num_quantized_chars, conf.embedding_size],) self.all_conv_1 = [] norm = tf.random_normal_initializer(stddev=0.1) for i in range(len(self.list_d_s)): with tf.variable_scope('%d' % (i)): with tf.device('/cpu:0'), tf.name_scope("embedding"): self.embedded_characters = tf.nn.embedding_lookup(self.W0, self.list_d_s[i]) self.embedded_characters_expanded = tf.expand_dims(self.embedded_characters, -1, name="embedding_input") with tf.variable_scope('layer_0'): filter_shape0 = [conf.filter_size, conf.embedding_size, 1, 64] strides0 = [1, 1, conf.embedding_size, 1] self.filter_0 = tf.get_variable('filter1', filter_shape0, initializer=norm) self.h0 = Conv(self.embedded_characters_expanded, self.filter_0, strides0, TRAIN, 'layer_0') self.all_conv_1.append(self.h0) ''' with tf.variable_scope('layer_1-2'): self.h1 = Convolutional_Block(self.h0, 64, TRAIN, 'layer_1-2') #self.pooled_1 = tf.nn.max_pool(self.h1, ksize=[1, conf.filter_size, 1, 1], strides=[1, 2, 1, 1], padding='SAME', # name="pool1") self.all_conv_1.append(self.h1) ''' with tf.name_scope('att_layer_3-8'): #part of domain classificatin #attention_1 A = distance(self.all_conv_1[0], self.all_conv_1[1]) #print (type(self.h1.shape[3]),type(self.all_conv_1[1].shape[1])) A_4_input, B_4_input = attention_process_1(A, self.all_conv_1[0], self.all_conv_1[1],'w1_0', 'w1_1') A_4_feature,_, _ = Convolutional_Block(A_4_input,64,None,None,TRAIN, 'a1-cnn') B_4_feature,_,_ = Convolutional_Block(B_4_input,64,None,None,TRAIN, 'b1-cnn') A_1 = distance(A_4_feature, B_4_feature) xs1_conv1_aten, xs2_conv1_aten = attention_process_2(A_1, A_4_feature, B_4_feature, 64) self.pooled_2_a = tf.squeeze(tf.squeeze(average_pool(xs1_conv1_aten, 'pooled_a4') ,axis=1), axis=1) self.pooled_2_b = tf.squeeze(tf.squeeze(average_pool(xs2_conv1_aten, 'pooled_b4') ,axis=1), axis=1) pooled_2_a = max_pool(xs1_conv1_aten, conf.filter_size, "pool2_a") pooled_2_b = max_pool(xs2_conv1_aten, conf.filter_size, "pool2_b") A_2 = distance(pooled_2_a, pooled_2_b) A_6_input, B_6_input = attention_process_1(A_2, pooled_2_a, pooled_2_b, 'w2_0', 'w2_1') A_6_feature, _, _ = Convolutional_Block(A_6_input,128,None,None,TRAIN,'a2-cnn') B_6_feature, _, _ = Convolutional_Block(B_6_input,128,None,None,TRAIN, 'b2-cnn') A_3 = distance(A_6_feature, B_6_feature) xs1_conv2_aten, xs2_conv2_aten = attention_process_2(A_3, A_6_feature, B_6_feature, 128) self.pooled_2_a = tf.squeeze(tf.squeeze(average_pool(xs1_conv2_aten, 'pooled_a4') ,axis=1), axis=1) self.pooled_2_b = tf.squeeze(tf.squeeze(average_pool(xs2_conv2_aten, 'pooled_b4') ,axis=1), axis=1) #pooled_3_a = max_pool(xs1_conv2_aten, conf.filter_size, "pool3_a") #pooled_3_b = max_pool(xs2_conv2_aten, conf.filter_size, "pool3_b") '''
def train(): """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(), tf.device('/cpu:0'): # Create a variable to count the number of train() calls. This equals the # number of batches processed * FLAGS.num_gpus. global_step = tf.get_variable( 'global_step', [], initializer=tf.constant_initializer(0), trainable=False) # Calculate the learning rate schedule. num_batches_per_epoch = (cifar10.NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / FLAGS.batch_size) decay_steps = int(num_batches_per_epoch * cifar10.NUM_EPOCHS_PER_DECAY / FLAGS.num_gpus) # Decay the learning rate exponentially based on the number of steps. lr = tf.train.exponential_decay(cifar10.INITIAL_LEARNING_RATE, global_step, decay_steps, cifar10.LEARNING_RATE_DECAY_FACTOR, staircase=True) # Create an optimizer that performs gradient descent. opt = tf.train.GradientDescentOptimizer(lr) if(FLAGS.iter_size > 1): opt = tp.optimizer.AccumGradOptimizer(opt, FLAGS.iter_size) # Get images and labels for CIFAR-10. images, labels = cifar10.distorted_inputs() batch_queue = tf.contrib.slim.prefetch_queue.prefetch_queue( [images, labels], capacity=2 * FLAGS.num_gpus) # Calculate the gradients for each model tower. tower_grads = [] with tf.variable_scope(tf.get_variable_scope()): for i in xrange(FLAGS.num_gpus): with tf.device('/gpu:%d' % i): with tf.name_scope('%s_%d' % (cifar10.TOWER_NAME, i)) as scope: # Dequeues one batch for the GPU image_batch, label_batch = batch_queue.dequeue() # Calculate the loss for one tower of the CIFAR model. This function # constructs the entire CIFAR model but shares the variables across # all towers. loss = tower_loss(scope, image_batch, label_batch) # Reuse variables for the next tower. tf.get_variable_scope().reuse_variables() # Retain the summaries from the final tower. summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope) # Calculate the gradients for the batch of data on this CIFAR tower. grads = opt.compute_gradients(loss) # Keep track of the gradients across all towers. tower_grads.append(grads) # We must calculate the mean of each gradient. Note that this is the # synchronization point across all towers. grads = average_gradients(tower_grads) # Add a summary to track the learning rate. summaries.append(tf.summary.scalar('learning_rate', lr)) # Add histograms for gradients. for grad, var in grads: if grad is not None: summaries.append(tf.summary.histogram(var.op.name + '/gradients', grad)) apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) # Add histograms for trainable variables. for var in tf.trainable_variables(): summaries.append(tf.summary.histogram(var.op.name, var)) # Track the moving averages of all trainable variables. variable_averages = tf.train.ExponentialMovingAverage( cifar10.MOVING_AVERAGE_DECAY, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) # Group all updates to into a single train op. train_op = tf.group(apply_gradient_op, variables_averages_op) # Create a saver. saver = tf.train.Saver(tf.global_variables()) # Build the summary operation from the last tower summaries. summary_op = tf.summary.merge(summaries) # Build an initialization operation to run below. init = tf.global_variables_initializer() # Start running operations on the Graph. allow_soft_placement must be set to # True to build towers on GPU, as some of the ops do not have GPU # implementations. config=tf.ConfigProto( allow_soft_placement=True, log_device_placement=FLAGS.log_device_placement) # only allocate needed memory size config.gpu_options.allow_growth=True # visible gpu number #config.gpu_options.visible_device_list='1,2,3,4' # gpu memory usage restriction ratio #config.gpu_options.per_process_gpu_memory_fraction=0.9 sess = tf.Session(config=config) sess.run(init) # Start the queue runners. tf.train.start_queue_runners(sess=sess) summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph) for step in xrange(FLAGS.max_steps): start_time = time.time() _, loss_value = sess.run([train_op, loss]) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % 10 == 0: num_examples_per_step = FLAGS.batch_size * FLAGS.num_gpus * FLAGS.iter_size examples_per_sec = num_examples_per_step / duration sec_per_batch = duration / (FLAGS.num_gpus * FLAGS.iter_size) format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print (format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch)) print ('LR:%s' % (sess.run(lr))) print ('Global Step:%s' % (sess.run(global_step))) if step % 100 == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) # Save the model checkpoint periodically. if step % 1000 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
def _get_target_action(self, vector_input): vector_input = self.cast(vector_input) with tf.device(self.device): target_mu = self.actor_target_net(vector_input) return tf.clip_by_value(target_mu + self.action_noise(), -1, 1)
def alltoall_ring(xs, devices, split_axis, concat_axis): """MPI alltoall operation. Performance-optimized for a ring of devices. Args: xs: a list of n tf.Tensors devices: a list of n strings split_axis: an integer concat_axis: an integer Returns: a list of n Tensors """ n = len(xs) if n == 1: return xs # set up # [target, source] parts = [[None] * n for i in xrange(n)] def my_split(x, size_splits): total_size = tf.shape(x)[split_axis] part_size = total_size // sum(size_splits) return tf.split(x, [s * part_size for s in size_splits], axis=split_axis) forward_message_size = (n - 1) // 2 backward_message_size = (n - 1) - forward_message_size forward_messages = [None] * n backward_messages = [None] * n for i in xrange(n): with tf.device(devices[i]): if i >= backward_message_size: a, b, c, d = my_split(xs[i], [ i - backward_message_size, backward_message_size, 1, n - i - 1 ]) backward_messages[i] = b parts[i][i] = c forward_messages[i] = tf.concat([d, a], axis=split_axis) else: a, b, c, d = my_split( xs[i], [i, 1, forward_message_size, backward_message_size - i]) backward_messages[i] = tf.concat([d, a], axis=split_axis) parts[i][i] = b forward_messages[i] = c for step in xrange(1, max(forward_message_size, backward_message_size) + 1): new_forward_messages = [None] * n new_backward_messages = [None] * n for i in xrange(n): with tf.device(devices[i]): if forward_message_size > 0: parts[i][(i - step) % n], new_forward_messages[i] = my_split( forward_messages[(i - 1) % n], [1, forward_message_size - 1]) if backward_message_size > 0: new_backward_messages[i], parts[i][ (i + step) % n] = my_split( backward_messages[(i + 1) % n], [backward_message_size - 1, 1]) forward_message_size -= 1 backward_message_size -= 1 forward_messages = new_forward_messages backward_messages = new_backward_messages return mtf.parallel(devices, tf.concat, parts, axis=[concat_axis] * n)
batch_size = 128 embedding_size = 128 skip_window = 1 num_skips = 2 valid_size = 16 valid_window =100 valid_examples = np.random.choice(valid_window, valid_size, replace=False) num_sampled = 64 graph = tf.Graph() with graph.as_default(): train_inputs = tf.placeholder(tf.int32, shape=[batch_size]) train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1]) valid_dataset = tf.constant(valid_examples, dtype=tf.int32) with tf.device('/cpu:0'): embeddings = tf.Variable( tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0) ) embed = tf.nn.embedding_lookup(embeddings, train_inputs) nce_weights = tf.Variable( tf.truncated_normal([vocabulary_size, embedding_size], stddev=1.0 / math.sqrt(embedding_size)) ) nce_biases = tf.Variable(tf.zeros([vocabulary_size])) loss = tf.reduce_mean(tf.nn.nce_loss(weights=nce_weights, biases=nce_biases,
idx = tf.slice(outi, [0, 0, 0], [-1, -1, k]) val = tf.slice(out, [0, 0, 0], [-1, -1, k]) print((idx, val)) #val, idx = tf.nn.top_k(-dist, k=k) # ONLY SUPPORT CPU return val, idx if __name__ == '__main__': knn = True import numpy as np import time np.random.seed(100) pts = np.random.random((32, 512, 64)).astype('float32') tmp1 = np.random.random((32, 512, 3)).astype('float32') tmp2 = np.random.random((32, 128, 3)).astype('float32') with tf.device('/gpu:1'): points = tf.constant(pts) xyz1 = tf.constant(tmp1) xyz2 = tf.constant(tmp2) radius = 0.1 nsample = 64 if knn: _, idx = knn_point(nsample, xyz1, xyz2) grouped_points = group_point(points, idx) else: idx, _ = query_ball_point(radius, nsample, xyz1, xyz2) grouped_points = group_point(points, idx) #grouped_points_grad = tf.ones_like(grouped_points) #points_grad = tf.gradients(grouped_points, points, grouped_points_grad) with tf.Session('') as sess: now = time.time()
def _build_word_char_embeddings(self): ''' options contains key 'char_cnn': { 'n_characters': 262, # includes the start / end characters 'max_characters_per_token': 50, 'filters': [ [1, 32], [2, 32], [3, 64], [4, 128], [5, 256], [6, 512], [7, 512] ], 'activation': 'tanh', # for the character embedding 'embedding': {'dim': 16} # for highway layers # if omitted, then no highway layers 'n_highway': 2, } ''' projection_dim = self.options['lstm']['projection_dim'] cnn_options = self.options['char_cnn'] filters = cnn_options['filters'] n_filters = sum(f[1] for f in filters) max_chars = cnn_options['max_characters_per_token'] char_embed_dim = cnn_options['embedding']['dim'] n_chars = cnn_options['n_characters'] if n_chars != 262: raise InvalidNumberOfCharacters( "Set n_characters=262 after training see the README.md") if cnn_options['activation'] == 'tanh': activation = tf.nn.tanh elif cnn_options['activation'] == 'relu': activation = tf.nn.relu # the character embeddings with tf.device("/cpu:0"): self.embedding_weights = tf.get_variable( "char_embed", [n_chars, char_embed_dim], dtype=DTYPE, initializer=tf.random_uniform_initializer(-1.0, 1.0)) # shape (batch_size, unroll_steps, max_chars, embed_dim) self.char_embedding = tf.nn.embedding_lookup( self.embedding_weights, self.ids_placeholder) # the convolutions def make_convolutions(inp): with tf.variable_scope('CNN') as scope: convolutions = [] for i, (width, num) in enumerate(filters): if cnn_options['activation'] == 'relu': # He initialization for ReLU activation # with char embeddings init between -1 and 1 #w_init = tf.random_normal_initializer( # mean=0.0, # stddev=np.sqrt(2.0 / (width * char_embed_dim)) #) # Kim et al 2015, +/- 0.05 w_init = tf.random_uniform_initializer(minval=-0.05, maxval=0.05) elif cnn_options['activation'] == 'tanh': # glorot init w_init = tf.random_normal_initializer( mean=0.0, stddev=np.sqrt(1.0 / (width * char_embed_dim))) w = tf.get_variable("W_cnn_%s" % i, [1, width, char_embed_dim, num], initializer=w_init, dtype=DTYPE) b = tf.get_variable( "b_cnn_%s" % i, [num], dtype=DTYPE, initializer=tf.constant_initializer(0.0)) conv = tf.nn.conv2d( inp, w, strides=[1, 1, 1, 1], padding="VALID") + b # now max pool conv = tf.nn.max_pool(conv, [1, 1, max_chars - width + 1, 1], [1, 1, 1, 1], 'VALID') # activation conv = activation(conv) conv = tf.squeeze(conv, squeeze_dims=[2]) convolutions.append(conv) return tf.concat(convolutions, 2) embedding = make_convolutions(self.char_embedding) # for highway and projection layers n_highway = cnn_options.get('n_highway') use_highway = n_highway is not None and n_highway > 0 use_proj = n_filters != projection_dim if use_highway or use_proj: # reshape from (batch_size, n_tokens, dim) to (-1, dim) batch_size_n_tokens = tf.shape(embedding)[0:2] embedding = tf.reshape(embedding, [-1, n_filters]) # set up weights for projection if use_proj: assert n_filters > projection_dim with tf.variable_scope('CNN_proj') as scope: W_proj_cnn = tf.get_variable( "W_proj", [n_filters, projection_dim], initializer=tf.random_normal_initializer( mean=0.0, stddev=np.sqrt(1.0 / n_filters)), dtype=DTYPE) b_proj_cnn = tf.get_variable( "b_proj", [projection_dim], initializer=tf.constant_initializer(0.0), dtype=DTYPE) # apply highways layers def high(x, ww_carry, bb_carry, ww_tr, bb_tr): carry_gate = tf.nn.sigmoid(tf.matmul(x, ww_carry) + bb_carry) transform_gate = tf.nn.relu(tf.matmul(x, ww_tr) + bb_tr) return carry_gate * transform_gate + (1.0 - carry_gate) * x if use_highway: highway_dim = n_filters for i in range(n_highway): with tf.variable_scope('CNN_high_%s' % i) as scope: W_carry = tf.get_variable( 'W_carry', [highway_dim, highway_dim], # glorit init initializer=tf.random_normal_initializer( mean=0.0, stddev=np.sqrt(1.0 / highway_dim)), dtype=DTYPE) b_carry = tf.get_variable( 'b_carry', [highway_dim], initializer=tf.constant_initializer(-2.0), dtype=DTYPE) W_transform = tf.get_variable( 'W_transform', [highway_dim, highway_dim], initializer=tf.random_normal_initializer( mean=0.0, stddev=np.sqrt(1.0 / highway_dim)), dtype=DTYPE) b_transform = tf.get_variable( 'b_transform', [highway_dim], initializer=tf.constant_initializer(0.0), dtype=DTYPE) embedding = high(embedding, W_carry, b_carry, W_transform, b_transform) # finally project down if needed if use_proj: embedding = tf.matmul(embedding, W_proj_cnn) + b_proj_cnn # reshape back to (batch_size, tokens, dim) if use_highway or use_proj: shp = tf.concat([batch_size_n_tokens, [projection_dim]], axis=0) embedding = tf.reshape(embedding, shp) # at last assign attributes for remainder of the model self.embedding = embedding
def __init__(self, sequence_length, num_classes, vocab_size, emd_dim, filter_sizes, num_filters, l2_reg_lambda=0.0, batch_size=32, reference_size=16, dropout_keep_prob=.75): # Placeholders for input, output and dropout self.input_x = tf.placeholder(tf.int32, [batch_size, sequence_length], name="input_x") self.input_ref = tf.placeholder(tf.int32, [reference_size, sequence_length], name="input_ref") self.input_y = tf.placeholder(tf.float32, [batch_size, num_classes], name="input_y") self.dropout_keep_prob = dropout_keep_prob # Keeping track of l2 regularization loss (optional) l2_loss = tf.constant(0.0) with tf.variable_scope('discriminator'): # Embedding layer with tf.device('/cpu:0'), tf.name_scope("embedding"): self.W = tf.Variable(tf.random_uniform([vocab_size, emd_dim], -1.0, 1.0), name="W") self.embedded_chars = tf.nn.embedding_lookup( self.W, self.input_x) self.embedded_chars_expanded = tf.expand_dims( self.embedded_chars, -1) self.embedded_chars_ref = tf.nn.embedding_lookup( self.W, self.input_ref) self.embedded_chars_expanded_ref = tf.expand_dims( self.embedded_chars_ref, -1) # Create a convolution + maxpool layer for each filter size pooled_outputs = [] pooled_outputs_ref = [] for filter_size, num_filter in zip(filter_sizes, num_filters): with tf.name_scope("conv-maxpool-%s" % filter_size): # Convolution Layer filter_shape = [filter_size, emd_dim, 1, num_filter] W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W") b = tf.Variable(tf.constant(0.1, shape=[num_filter]), name="b") conv = tf.nn.conv2d(self.embedded_chars_expanded, W, strides=[1, 1, 1, 1], padding="VALID", name="conv") conv_ref = tf.nn.conv2d(self.embedded_chars_expanded_ref, W, strides=[1, 1, 1, 1], padding="VALID", name="conv_ref") # Apply nonlinearity h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") h_ref = tf.nn.relu( tf.nn.bias_add(conv_ref, b, name="relu_ref")) # Maxpooling over the outputs pooled = tf.nn.max_pool( h, ksize=[1, sequence_length - filter_size + 1, 1, 1], strides=[1, 1, 1, 1], padding='VALID', name="pool") pooled_ref = tf.nn.max_pool( h_ref, ksize=[1, sequence_length - filter_size + 1, 1, 1], strides=[1, 1, 1, 1], padding='VALID', name="pool_ref") pooled_outputs.append(pooled) pooled_outputs_ref.append(pooled_ref) # Combine all the pooled features num_filters_total = sum(num_filters) self.h_pool = tf.concat(pooled_outputs, 3) self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total]) self.h_pool_ref = tf.concat(pooled_outputs_ref, 3) self.h_pool_flat_ref = tf.reshape(self.h_pool_ref, [-1, num_filters_total]) # Add highway with tf.name_scope("highway"): self.h_highway = highway(self.h_pool_flat, self.h_pool_flat.get_shape()[1], 1, 0, scope="highway") self.h_highway_ref = highway( self.h_pool_flat_ref, self.h_pool_flat_ref.get_shape()[1], 1, 0, scope="highway") # Add dropout with tf.name_scope("dropout"): self.h_drop = tf.nn.dropout(self.h_highway, self.dropout_keep_prob) self.h_drop_ref = tf.nn.dropout(self.h_highway_ref, self.dropout_keep_prob) # Final (unnormalized) scores and predictions with tf.name_scope("output"): """ scores = tf.TensorArray(dtype=tf.float32, size=batch_size, dynamic_size=False, infer_shape=True) def rank_recurrence(i, scores): rank_score = get_rank_score(tf.nn.embedding_lookup(self.h_drop, i), self.h_drop_ref) scores = scores.write(i, rank_score) return i + 1, scores _, self.scores = control_flow_ops.while_loop( cond=lambda i, _1: i < batch_size, body=rank_recurrence, loop_vars=(tf.constant(0, dtype=tf.int32), scores) ) """ score = [] """ for i in range(batch_size): value = tf.constant(0.0, dtype=tf.float32) for j in range(reference_size): value += cosine_distance(tf.nn.embedding_lookup(self.h_drop, i), tf.nn.embedding_lookup(self.h_drop_ref, j)) score.append(value) self.scores = tf.stack(score) self.scores = tf.reshape(self.scores, [-1]) """ self.reference = tf.reduce_mean(tf.nn.l2_normalize( self.h_drop_ref, axis=-1), axis=0, keep_dims=True) self.feature = tf.nn.l2_normalize(self.h_drop, axis=-1) self.scores = tf.reshape( self.feature @ tf.transpose(self.reference, perm=[1, 0]), [-1]) self.ypred_for_auc = tf.reshape(tf.nn.softmax(self.scores), [-1]) self.log_score = tf.log(self.ypred_for_auc) # CalculateMean cross-entropy loss with tf.name_scope("loss"): self.neg_vec = tf.nn.embedding_lookup( tf.transpose(self.input_y), 1) self.pos_vec = tf.nn.embedding_lookup( tf.transpose(self.input_y), 0) losses_minus = self.log_score * self.neg_vec losses_posit = self.log_score * self.pos_vec self.loss = (-tf.reduce_sum(losses_minus) / tf.maximum(tf.reduce_sum(self.neg_vec), 1e-5) + tf.reduce_sum(losses_posit) / tf.maximum(tf.reduce_sum(self.pos_vec), 1e-5) ) / reference_size self.params = [ param for param in tf.trainable_variables() if 'discriminator' in param.name ] d_optimizer = tf.train.AdamOptimizer(1e-4) grads_and_vars = d_optimizer.compute_gradients(self.loss, self.params, aggregation_method=2) self.train_op = d_optimizer.apply_gradients(grads_and_vars) from ...models.rankgan import SAVING_PATH SavableModel.__init__(self, self.params, SAVING_PATH, 'discriminator')
def get_model(self, num_classes, activation='sigmoid'): max_len = opt.max_len max_ngram_len = opt.ngram_max_len voca_size = opt.unigram_hash_size + 1 with tf.device('/gpu:0'): def LAYER(input1, input2, max_len=max_len): Avg = Dropout(rate=0.5)(input1) Avg = BatchNormalization()(Avg) Avg = GlobalAveragePooling1D()(Avg) mat = Reshape((max_len, 1))(input2) Dot = dot([input1, mat], axes=1) Dot = Flatten()(Dot) Dot = Dropout(rate=0.5)(Dot) Dot = BatchNormalization()(Dot) return Avg, Dot embd = Embedding(voca_size, opt.embd_size, name='uni_embd') #################################### uni = Input((max_len, ), name="t_uni") uni_embd = embd(uni) # token w_uni = Input((max_len, ), name="w_uni") #################################### shape = Input((max_len, ), name="shape") shape_embd = embd(shape) w_shape = Input((max_len, ), name="w_shape") #################################### noun = Input((max_len, ), name="noun") noun_embd = embd(noun) w_noun = Input((max_len, ), name="w_noun") #################################### bmm = Input((max_len, ), name="bmm") bmm_embd = embd(bmm) w_bmm = Input((max_len, ), name="w_bmm") #################################### ngram = Input((max_ngram_len, ), name="ngram") ngram_embd = embd(ngram) w_ngram = Input((max_ngram_len, ), name="w_ngram") #################################### jamo3 = Input((max_len, ), name="jamo3") jamo_embd3 = embd(jamo3) w_jamo3 = Input((max_len, ), name="w_jamo3") #################################### jamo2 = Input((max_len, ), name="jamo2") jamo_embd2 = embd(jamo2) w_jamo2 = Input((max_len, ), name="w_jamo2") #################################### jamo1 = Input((max_len, ), name="jamo1") jamo_embd1 = embd(jamo1) w_jamo1 = Input((max_len, ), name="w_jamo1") #################################### img = Input((2048, ), name="image") uni_avg, uni_dot = LAYER(uni_embd, w_uni, max_len=max_len) shape_avg, shape_dot = LAYER(shape_embd, w_shape, max_len=max_len) noun_avg, noun_dot = LAYER(noun_embd, w_noun, max_len=max_len) ngram_avg, ngram_dot = LAYER(ngram_embd, w_ngram, max_len=max_ngram_len) jamo_avg3, jamo_dot3 = LAYER(jamo_embd3, w_jamo3, max_len=max_len) jamo_avg2, jamo_dot2 = LAYER(jamo_embd2, w_jamo2, max_len=max_len) jamo_avg1, jamo_dot1 = LAYER(jamo_embd1, w_jamo1, max_len=max_len) bmm_avg, bmm_dot = LAYER(bmm_embd, w_bmm, max_len=max_len) result = Concatenate()([ uni_avg, uni_dot, shape_avg, shape_dot, noun_avg, noun_dot, ngram_avg, ngram_dot, jamo_dot3, jamo_dot2, jamo_dot1, bmm_dot, img ]) result = Dropout(rate=0.5)(result) result = BatchNormalization()(result) result = Activation('relu')(result) outputs = Dense(num_classes, activation=activation)(result) #################################### model = Model(inputs=[ uni, w_uni, shape, w_shape, noun, w_noun, bmm, w_bmm, ngram, w_ngram, jamo3, w_jamo3, jamo2, w_jamo2, jamo1, w_jamo1, img ], outputs=outputs) optm = keras.optimizers.adam(0.0002) model.compile(loss='categorical_crossentropy', optimizer=optm, metrics=[top1_acc]) model.summary(print_fn=lambda x: self.logger.info(x)) return model
def __init__(self, args, infer=False): self.args = args if infer: args.batch_size = 1 args.seq_length = 1 if args.model == 'rnn': cell_fn = rnn.BasicRNNCell elif args.model == 'gru': cell_fn = rnn.GRUCell elif args.model == 'lstm': cell_fn = rnn.BasicLSTMCell else: raise Exception("model type not supported: {}".format(args.model)) cells = [] for _ in range(args.num_layers): cell = cell_fn(args.rnn_size) cells.append(cell) self.cell = cell = rnn.MultiRNNCell(cells) self.input_data = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.targets = tf.placeholder(tf.int32, [args.batch_size, args.seq_length]) self.initial_state = cell.zero_state(args.batch_size, tf.float32) self.batch_pointer = tf.Variable(0, name="batch_pointer", trainable=False, dtype=tf.int32) self.inc_batch_pointer_op = tf.assign(self.batch_pointer, self.batch_pointer + 1) self.epoch_pointer = tf.Variable(0, name="epoch_pointer", trainable=False) self.batch_time = tf.Variable(0.0, name="batch_time", trainable=False) tf.summary.scalar("time_batch", self.batch_time) def variable_summaries(var): """Attach a lot of summaries to a Tensor (for TensorBoard visualization).""" with tf.name_scope('summaries'): mean = tf.reduce_mean(var) tf.summary.scalar('mean', mean) #with tf.name_scope('stddev'): # stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean))) #tf.summary.scalar('stddev', stddev) tf.summary.scalar('max', tf.reduce_max(var)) tf.summary.scalar('min', tf.reduce_min(var)) #tf.summary.histogram('histogram', var) with tf.variable_scope('rnnlm'): softmax_w = tf.get_variable("softmax_w", [args.rnn_size, args.vocab_size]) variable_summaries(softmax_w) softmax_b = tf.get_variable("softmax_b", [args.vocab_size]) variable_summaries(softmax_b) with tf.device("/cpu:0"): embedding = tf.get_variable("embedding", [args.vocab_size, args.rnn_size]) inputs = tf.split( tf.nn.embedding_lookup(embedding, self.input_data), args.seq_length, 1) inputs = [tf.squeeze(input_, [1]) for input_ in inputs] def loop(prev, _): prev = tf.matmul(prev, softmax_w) + softmax_b prev_symbol = tf.stop_gradient(tf.argmax(prev, 1)) return tf.nn.embedding_lookup(embedding, prev_symbol) outputs, last_state = legacy_seq2seq.rnn_decoder( inputs, self.initial_state, cell, loop_function=loop if infer else None, scope='rnnlm') output = tf.reshape(tf.concat(outputs, 1), [-1, args.rnn_size]) self.logits = tf.matmul(output, softmax_w) + softmax_b self.probs = tf.nn.softmax(self.logits) loss = legacy_seq2seq.sequence_loss_by_example( [self.logits], [tf.reshape(self.targets, [-1])], [tf.ones([args.batch_size * args.seq_length])], args.vocab_size) self.cost = tf.reduce_sum(loss) / args.batch_size / args.seq_length tf.summary.scalar("cost", self.cost) self.final_state = last_state self.lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), args.grad_clip) optimizer = tf.train.AdamOptimizer(self.lr) self.train_op = optimizer.apply_gradients(zip(grads, tvars))
def main(unused_argv): tf.logging.set_verbosity(tf.logging.INFO) # Set up deployment (i.e., multi-GPUs and/or multi-replicas). config = model_deploy.DeploymentConfig( num_clones=FLAGS.num_clones, clone_on_cpu=FLAGS.clone_on_cpu, replica_id=FLAGS.task, num_replicas=FLAGS.num_replicas, num_ps_tasks=FLAGS.num_ps_tasks) # Split the batch across GPUs. assert FLAGS.train_batch_size % config.num_clones == 0, ( 'Training batch size not divisble by number of clones (GPUs).') clone_batch_size = FLAGS.train_batch_size // config.num_clones tf.gfile.MakeDirs(FLAGS.train_logdir) tf.logging.info('Training on %s set', FLAGS.train_split) with tf.Graph().as_default() as graph: with tf.device(config.inputs_device()): dataset = data_generator.Dataset( dataset_name=FLAGS.dataset, split_name=FLAGS.train_split, dataset_dir=FLAGS.dataset_dir, batch_size=clone_batch_size, crop_size=[int(sz) for sz in FLAGS.train_crop_size], min_resize_value=FLAGS.min_resize_value, max_resize_value=FLAGS.max_resize_value, resize_factor=FLAGS.resize_factor, min_scale_factor=FLAGS.min_scale_factor, max_scale_factor=FLAGS.max_scale_factor, scale_factor_step_size=FLAGS.scale_factor_step_size, model_variant=FLAGS.model_variant, num_readers=4, is_training=True, should_shuffle=True, should_repeat=True) # Create the global step on the device storing the variables. with tf.device(config.variables_device()): global_step = tf.train.get_or_create_global_step() # Define the model and create clones. model_fn = _build_deeplab model_args = (dataset.get_one_shot_iterator(), { common.OUTPUT_TYPE: dataset.num_of_classes }, dataset.ignore_label) clones = model_deploy.create_clones( config, model_fn, args=model_args) # Gather update_ops from the first clone. These contain, for example, # the updates for the batch_norm variables created by model_fn. first_clone_scope = config.clone_scope(0) update_ops = tf.get_collection( tf.GraphKeys.UPDATE_OPS, first_clone_scope) # Gather initial summaries. summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) # Add summaries for model variables. for model_var in tf.model_variables(): summaries.add(tf.summary.histogram(model_var.op.name, model_var)) # Add summaries for images, labels, semantic predictions if FLAGS.save_summaries_images: summary_image = graph.get_tensor_by_name( ('%s/%s:0' % (first_clone_scope, common.IMAGE)).strip('/')) summaries.add( tf.summary.image('samples/%s' % common.IMAGE, summary_image)) first_clone_label = graph.get_tensor_by_name( ('%s/%s:0' % (first_clone_scope, common.LABEL)).strip('/')) # Scale up summary image pixel values for better visualization. pixel_scaling = max(1, 255 // dataset.num_of_classes) summary_label = tf.cast( first_clone_label * pixel_scaling, tf.uint8) summaries.add( tf.summary.image('samples/%s' % common.LABEL, summary_label)) first_clone_output = graph.get_tensor_by_name( ('%s/%s:0' % (first_clone_scope, common.OUTPUT_TYPE)).strip('/')) predictions = tf.expand_dims(tf.argmax(first_clone_output, 3), -1) summary_predictions = tf.cast( predictions * pixel_scaling, tf.uint8) summaries.add( tf.summary.image( 'samples/%s' % common.OUTPUT_TYPE, summary_predictions)) # Add summaries for losses. for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope): summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss)) # Build the optimizer based on the device specification. with tf.device(config.optimizer_device()): learning_rate = train_utils.get_model_learning_rate( FLAGS.learning_policy, FLAGS.base_learning_rate, FLAGS.learning_rate_decay_step, FLAGS.learning_rate_decay_factor, FLAGS.training_number_of_steps, FLAGS.learning_power, FLAGS.slow_start_step, FLAGS.slow_start_learning_rate, decay_steps=FLAGS.decay_steps, end_learning_rate=FLAGS.end_learning_rate) summaries.add(tf.summary.scalar('learning_rate', learning_rate)) if FLAGS.optimizer == 'momentum': optimizer = tf.train.MomentumOptimizer( learning_rate, FLAGS.momentum) elif FLAGS.optimizer == 'adam': optimizer = tf.train.AdamOptimizer( learning_rate=FLAGS.adam_learning_rate, epsilon=FLAGS.adam_epsilon) else: raise ValueError('Unknown optimizer') if FLAGS.quantize_delay_step >= 0: if FLAGS.num_clones > 1: raise ValueError( 'Quantization doesn\'t support multi-clone yet.') contrib_quantize.create_training_graph( quant_delay=FLAGS.quantize_delay_step) startup_delay_steps = FLAGS.task * FLAGS.startup_delay_steps with tf.device(config.variables_device()): total_loss, grads_and_vars = model_deploy.optimize_clones( clones, optimizer) total_loss = tf.check_numerics(total_loss, 'Loss is inf or nan.') summaries.add(tf.summary.scalar('total_loss', total_loss)) # Modify the gradients for biases and last layer variables. last_layers = model.get_extra_layer_scopes( FLAGS.last_layers_contain_logits_only) grad_mult = train_utils.get_model_gradient_multipliers( last_layers, FLAGS.last_layer_gradient_multiplier) if grad_mult: grads_and_vars = slim.learning.multiply_gradients( grads_and_vars, grad_mult) # Create gradient update op. grad_updates = optimizer.apply_gradients( grads_and_vars, global_step=global_step) update_ops.append(grad_updates) update_op = tf.group(*update_ops) with tf.control_dependencies([update_op]): train_tensor = tf.identity(total_loss, name='train_op') # Add the summaries from the first clone. These contain the summaries # created by model_fn and either optimize_clones() or _gather_clone_loss(). summaries |= set( tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope)) # Merge all summaries together. summary_op = tf.summary.merge(list(summaries)) # Soft placement allows placing on CPU ops without GPU implementation. session_config = tf.ConfigProto( allow_soft_placement=True, log_device_placement=False) # Start the training. profile_dir = FLAGS.profile_logdir if profile_dir is not None: tf.gfile.MakeDirs(profile_dir) with contrib_tfprof.ProfileContext( enabled=profile_dir is not None, profile_dir=profile_dir): init_fn = None if FLAGS.tf_initial_checkpoint: init_fn = train_utils.get_model_init_fn( FLAGS.train_logdir, FLAGS.tf_initial_checkpoint, FLAGS.initialize_last_layer, last_layers, ignore_missing_vars=True) slim.learning.train( train_tensor, logdir=FLAGS.train_logdir, log_every_n_steps=FLAGS.log_steps, master=FLAGS.master, number_of_steps=FLAGS.training_number_of_steps, is_chief=(FLAGS.task == 0), session_config=session_config, startup_delay_steps=startup_delay_steps, init_fn=init_fn, summary_op=summary_op, save_summaries_secs=FLAGS.save_summaries_secs, save_interval_secs=FLAGS.save_interval_secs)
tf.float32, [None, 1, 4]) # [batch, num_bbox, (y1, x1, y2, x2)] image_splits = tf.split(input_image, num_gpus) ratio_splits = tf.split(input_ratio, num_gpus) gt_bbox_splits = tf.split(input_gt_bbox, num_gpus) opt = tf.train.AdamOptimizer(0.001) global_step = tf.Variable(0, name='global_step', trainable=False) tower_grads = [] tower_loss = [] counter = 0 with tf.variable_scope(tf.get_variable_scope()): for d in range(num_gpus): with tf.device('/gpu:{}'.format(d)): with tf.name_scope('{}_{}'.format('tower', d)): loss = build_model(image_splits[counter], ratio_splits[counter], gt_bbox_splits[counter]) tf.get_variable_scope().reuse_variables() counter += 1 with tf.variable_scope("loss"): grads_vars_all = opt.compute_gradients(loss) tower_grads.append(grads_vars_all) tower_loss.append(loss) mean_loss = tf.stack(axis=0, values=tower_loss) mean_loss = tf.reduce_mean(mean_loss, 0) mean_grads = average_gradients(tower_grads) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
tokenizer, is_training=False) eval_examples = json.load(open(args.eval_dir1, 'r')) eval_data = json.load(open(args.eval_dir2, 'r')) eval_steps_per_epoch = len(eval_data) // (args.n_batch * n_gpu) eval_gen = data_generator(eval_data, args.n_batch * n_gpu, shuffle=False, drop_last=False) if len(eval_data) % (args.n_batch * n_gpu) != 0: eval_steps_per_epoch += 1 with tf.device("/gpu:0"): input_ids = tf.placeholder(tf.int32, shape=[None, args.max_seq_length], name='input_ids') input_masks = tf.placeholder(tf.float32, shape=[None, args.max_seq_length], name='input_masks') segment_ids = tf.placeholder(tf.int32, shape=[None, args.max_seq_length], name='segment_ids') start_positions = tf.placeholder(tf.int32, shape=[ None, ], name='start_positions') end_positions = tf.placeholder(tf.int32,
def build_summaries(self): # SUMMARIES with tf.device('/cpu:0'): for i in range(4): tf.summary.scalar('ssim_loss_' + str(i), self.ssim_loss_left[i] + self.ssim_loss_right[i], collections=self.model_collection) tf.summary.scalar('l1_loss_' + str(i), self.l1_reconstruction_loss_left[i] + self.l1_reconstruction_loss_right[i], collections=self.model_collection) tf.summary.scalar('image_loss_' + str(i), self.image_loss_left[i] + self.image_loss_right[i], collections=self.model_collection) tf.summary.scalar('disp_gradient_loss_' + str(i), self.disp_left_loss[i] + self.disp_right_loss[i], collections=self.model_collection) tf.summary.scalar('lr_loss_' + str(i), self.lr_left_loss[i] + self.lr_right_loss[i], collections=self.model_collection) tf.summary.image('disp_left_est_' + str(i), self.disp_left_est[i], max_outputs=4, collections=self.model_collection) tf.summary.image('disp_right_est_' + str(i), self.disp_right_est[i], max_outputs=4, collections=self.model_collection) if self.params.full_summary: tf.summary.image('left_est_' + str(i), self.left_est[i], max_outputs=4, collections=self.model_collection) tf.summary.image('right_est_' + str(i), self.right_est[i], max_outputs=4, collections=self.model_collection) tf.summary.image('ssim_left_' + str(i), self.ssim_left[i], max_outputs=4, collections=self.model_collection) tf.summary.image('ssim_right_' + str(i), self.ssim_right[i], max_outputs=4, collections=self.model_collection) tf.summary.image('l1_left_' + str(i), self.l1_left[i], max_outputs=4, collections=self.model_collection) tf.summary.image('l1_right_' + str(i), self.l1_right[i], max_outputs=4, collections=self.model_collection) if self.params.full_summary: tf.summary.image('left', self.left, max_outputs=4, collections=self.model_collection) tf.summary.image('right', self.right, max_outputs=4, collections=self.model_collection)
# Careful with GPU memory allocation, TF never releases it. TF starts with almost # all of the GPU memory allocated. We can slowly grow to that limit with an # option setting: config.gpu_options.allow_growth = True sess_grow = tf.Session(config=config) # Also, we can limit the size of GPU memory used, with the following option config.gpu_options.per_process_gpu_memory_fraction = 0.4 sess_limited = tf.Session(config=config) # How to set placements on multiple devices. # Here, assume we have three devies CPU:0, GPU:0, and GPU:1 if tf.test.is_built_with_cuda(): with tf.device('/cpu:0'): a = tf.constant([1.0, 3.0, 5.0], shape=[1, 3]) b = tf.constant([2.0, 4.0, 6.0], shape=[3, 1]) with tf.device('/gpu:1'): c = tf.matmul(a,b) c = tf.reshape(c, [-1]) with tf.device('/gpu:2'): d = tf.matmul(b,a) flat_d = tf.reshape(d, [-1]) combined = tf.mul(c, flat_d) print(sess.run(combined))
def main(argv=None): # pylint: disable=unused-argument with tf.device('gpu:0'): if tf.gfile.Exists(FLAGS.train_dir): tf.gfile.DeleteRecursively(FLAGS.train_dir) tf.gfile.MakeDirs(FLAGS.train_dir) train()
def __init__(self, config, device, loader, mode): self.config = config self.mode = mode if mode == "Train": self.is_training = True self.batch_size = self.config.train_batch_size self.maxstep_size = self.config.train_step_size reuse = None elif mode == "Valid": self.is_training = False self.batch_size = self.config.valid_batch_size reuse = True else: self.is_training = False self.batch_size = self.config.test_batch_size reuse = True self.hidden_size = hidden_size = config.hidden_size self.learning_rate = learning_rate = config.learning_rate opt = config.sgd_opt batch_size = self.batch_size self.node_num = node_num = config.node_num self.max_degree = max_degree = config.max_degree self.n_layer = n_layer = config.n_layer # assert batch_size == 1 self.path = loader.path_file self.embedding_path = self.path + loader.embedding_path hidden_stdv = np.sqrt(1. / (hidden_size)) # embedding initial with tf.device(device), tf.name_scope(mode), tf.variable_scope( "gnn", reuse=reuse): # self.W_1 = tf.get_variable( name='W_1', shape=[degree_max, hidden_size], initializer=tf.random_normal_initializer(hidden_stdv), # initializer=tf.zeros_initializer(), trainable=True, ) self.W_2 = tf.get_variable( name='W_2', shape=[hidden_size, hidden_size], initializer=tf.random_normal_initializer(hidden_stdv), # initializer=tf.zeros_initializer(), trainable=True, ) # #------------feed-----------------## # input data are edge information of a batch of start, end and the changes self.input_x = input_x = tf.placeholder(tf.int32, (batch_size, )) self.input_y = input_y = tf.placeholder(tf.int32, (batch_size, )) self.negative_sample = negative_sample = tf.placeholder( tf.int32, (batch_size, )) self.input_adj = input_adj = tf.placeholder(tf.float32, (node_num, node_num)) # self.feature_h0 = feature_h0 = tf.ones(shape=(node_num, 100), dtype=tf.float32) * hidden_stdv self.feature_h0 = feature_h0 = tf.placeholder(tf.float32, (node_num, degree_max)) # self.edge_y = edge_y = tf.placeholder(tf.float32, [batch_size, 1]) with tf.device(device), tf.name_scope(mode), tf.variable_scope( "DynGCN", reuse=reuse): self.final_embedding = self.gcn(input_adj, feature_h0) new_embedding_x = tf.nn.embedding_lookup(self.final_embedding, input_x) new_embedding_y = tf.nn.embedding_lookup(self.final_embedding, input_y) new_embedding_n = tf.nn.embedding_lookup(self.final_embedding, negative_sample) result = tf.reduce_mean(new_embedding_x * new_embedding_y, axis=1) result_n = tf.reduce_mean(new_embedding_x * new_embedding_n, axis=1) true_xent = tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.ones_like(result), logits=result) negative_xent = tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.zeros_like(result_n), logits=result_n) loss = tf.reduce_sum(true_xent) + tf.reduce_sum(negative_xent) self.test1 = result self.test2 = result_n # loss = - tf.reduce_mean(tf.sigmoid(result - result_n)) # -------------evaluation-------------- self.label_xy = tf.placeholder(tf.int32, (batch_size, )) self.prediction = tf.sigmoid(result) self.prediction_n = tf.sigmoid(result_n) if mode == 'Valid': self.auc_result, self.auc_opt = tf.metrics.auc( labels=self.label_xy, predictions=self.prediction) else: self.auc_result = self.auc_opt = tf.no_op() # self.f1_score = self.f1_opt = tf.no_op() # # -------------cost --------------- # cost_parameter = 0. # score_mean = tf.losses.sigmoid_cross_entropy( # multi_class_labels=self.input_y, # logits=s_pos # ) self.cost = cost = loss # ---------------optimizer---------------# self.no_opt = tf.no_op() self.learning_rate = tf.Variable(config.learning_rate, trainable=False) if mode == 'Train': self.auc_opt = tf.no_op() self.auc_result = tf.no_op() if opt == 'Adam': self.optimizer = tf.train.AdamOptimizer( self.learning_rate).minimize(cost) if opt == 'Momentum': self.optimizer = tf.train.MomentumOptimizer( self.learning_rate, 0.9).minimize(cost) if opt == 'RMSProp': self.optimizer = tf.train.RMSPropOptimizer( self.learning_rate).minimize(cost) if opt == 'Adadelta': self.optimizer = tf.train.AdadeltaOptimizer( self.learning_rate).minimize(cost) # self.optimizer = tf.no_op() else: self.optimizer = tf.no_op() self.cost = tf.no_op()
def build_example(label, param_dict_real): """Build the model with parameter values set in param_dict_real. Args: label: Label of the model (i.e. the filename in the zip). param_dict_real: Parameter dictionary (arguments to the factories make_graph and make_test_inputs) Returns: (tflite_model_binary, report) where tflite_model_binary is the serialized flatbuffer as a string and report is a dictionary with keys `toco_log` (log of toco conversion), `tf_log` (log of tf conversion), `toco` (a string of success status of the conversion), `tf` (a string success status of the conversion). """ np.random.seed(RANDOM_SEED) report = {"toco": report_lib.NOTRUN, "tf": report_lib.FAILED} # Build graph report["tf_log"] = "" report["toco_log"] = "" tf.compat.v1.reset_default_graph() with tf.device("/cpu:0"): try: inputs, outputs = make_graph(param_dict_real) except (tf.errors.UnimplementedError, tf.errors.InvalidArgumentError, ValueError): report["tf_log"] += traceback.format_exc() return None, report sess = tf.compat.v1.Session() try: baseline_inputs, baseline_outputs = (make_test_inputs( param_dict_real, sess, inputs, outputs)) except (tf.errors.UnimplementedError, tf.errors.InvalidArgumentError, ValueError): report["tf_log"] += traceback.format_exc() return None, report report["toco"] = report_lib.FAILED report["tf"] = report_lib.SUCCESS # Convert graph to toco input_tensors = [(input_tensor.name.split(":")[0], input_tensor.shape, input_tensor.dtype) for input_tensor in inputs] output_tensors = [ _normalize_output_name(out.name) for out in outputs ] # pylint: disable=g-long-ternary graph_def = freeze_graph( sess, tf.global_variables() + inputs + outputs) if use_frozen_graph else sess.graph_def if "split_tflite_lstm_inputs" in param_dict_real: extra_toco_options.split_tflite_lstm_inputs = param_dict_real[ "split_tflite_lstm_inputs"] tflite_model_binary, toco_log = options.tflite_convert_function( options, graph_def, input_tensors, output_tensors, extra_toco_options=extra_toco_options, test_params=param_dict_real) report["toco"] = (report_lib.SUCCESS if tflite_model_binary is not None else report_lib.FAILED) report["toco_log"] = toco_log if options.save_graphdefs: archive.writestr(label + ".pbtxt", text_format.MessageToString(graph_def), zipfile.ZIP_DEFLATED) if tflite_model_binary: if options.make_edgetpu_tests: # Set proper min max values according to input dtype. baseline_inputs, baseline_outputs = generate_inputs_outputs( tflite_model_binary, min_value=0, max_value=255) archive.writestr(label + ".bin", tflite_model_binary, zipfile.ZIP_DEFLATED) example = { "inputs": baseline_inputs, "outputs": baseline_outputs } example_fp = StringIO() write_examples(example_fp, [example]) archive.writestr(label + ".inputs", example_fp.getvalue(), zipfile.ZIP_DEFLATED) example_fp2 = StringIO() write_test_cases(example_fp2, label + ".bin", [example]) archive.writestr(label + "_tests.txt", example_fp2.getvalue(), zipfile.ZIP_DEFLATED) zip_manifest.append(label + "\n") return tflite_model_binary, report
# # loadModel = [loadModelAgent1,loadModelAgent2, loadModelAgent3, loadModelAgent4] #indicate where the saved model is loadModel = [loadModelAgent1,loadModelAgent2, loadModelAgent3, loadModelAgent4] #indicate where the saved model is # # loadModel = "" #indicate where the saved model is # #Parameters for controling the experiment isLogging = False #Logg the experiment isPlotting = True #plot the experiment plotFrequency = 1000 #plot the plots every X games createDataset = True # weather to save the dataset saveExperimentsIn = "/home/pablo/Documents/Datasets/ChefsHat_ReinforcementLearning/ICPR_Experiments/DQL/Random/1000/NewQPlot" # Directory where the experiment will be saved metrics = ChefsHatExperimentHandler.runExperiment(numGames=numGames, playersAgents=playersAgents,experimentDescriptor=experimentDescriptor,isLogging=isLogging,isPlotting=isPlotting,plotFrequency = plotFrequency, createDataset=createDataset,saveExperimentsIn=saveExperimentsIn, loadModel=loadModel, rewardFunction=reward) print ("Metrics:" + str(metrics)) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) # from keras import backend as K K.set_session(sess) with tf.device('/gpu:0'): runModel()
print cat_num_total print len(x) print len(y) # Evaluation # ================================================== prob_max=0.0 prob_min=1.0 total_val=0 total_num=len(x) right_num=0 graph = tf.Graph() total_right_val=0 with graph.as_default(), tf.device('/cpu:0'): output_graph_def = tf.GraphDef() output_graph_path = FLAGS.model_dir with open(output_graph_path, 'rb') as f: output_graph_def.ParseFromString(f.read()) _ = tf.import_graph_def(output_graph_def, name='') config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) with sess.as_default(): tf.initialize_all_variables().run() input_x = sess.graph.get_tensor_by_name('import/dev_x:0') input_y = sess.graph.get_tensor_by_name('import/dev_y:0') dropout_keep_prob = sess.graph.get_tensor_by_name('import/dropout_keep_prob:0') scores = sess.graph.get_tensor_by_name('import/output/scores:0')
"Ep:", GLOBAL_EP, "| Ep_r: %i" % GLOBAL_MEAN_R[-1], ) GLOBAL_EP += 1 if GLOBAL_MEAN_R[-1] > MAX_R and GLOBAL_MEAN_R[-1] > -350: # saver.save(SESS, 'model_adv/single',global_step=GLOBAL_EP) print("save episode:", GLOBAL_EP) MAX_R = GLOBAL_MEAN_R[-1] break if __name__ == "__main__": SESS = tf.Session() with tf.device("/cpu:0"): OPT_A = tf.train.RMSPropOptimizer(LR_A, name='RMSPropA') OPT_C = tf.train.RMSPropOptimizer(LR_C, name='RMSPropC') GLOBAL_AC = ACNet(GLOBAL_NET_SCOPE) # we only need its params workers = [] # Create worker for i in range(N_WORKERS): i_name = 'W_%i' % i # worker name workers.append(Worker(i_name, GLOBAL_AC)) COORD = tf.train.Coordinator() saver = tf.train.Saver() SESS.run(tf.global_variables_initializer()) worker_threads = [] for worker in workers:
def try_all_gpus(): """Return all available GPUs, or [cpu(),] if no GPU exists.""" num_gpus = len(tf.config.experimental.list_physical_devices('GPU')) devices = [tf.device(f'/GPU:{i}') for i in range(num_gpus)] return devices if devices else [tf.device('/CPU:0')]
def train(options, data, n_gpus, tf_save_dir, tf_log_dir, logger, restart_ckpt_file=None): with tf.device('/cpu:0'): global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) # set up the optimizer lr = options.get('learning_rate', 0.2) opt = tf.train.AdagradOptimizer(learning_rate=lr, initial_accumulator_value=1.0) # calculate the gradients on each GPU tower_grads = [] models = [] train_perplexity = tf.get_variable( 'train_perplexity', [], initializer=tf.constant_initializer(0.0), trainable=False) norm_summaries = [] for k in range(n_gpus): with tf.device('/gpu:%d' % k): with tf.variable_scope('lm', reuse=k > 0): # calculate the loss for one model replica and get # lstm states model = SentenceLanguageModel(options, True) loss = model.total_loss models.append(model) # get gradients grads = opt.compute_gradients( loss * options['unroll_steps'], aggregation_method=\ tf.AggregationMethod.EXPERIMENTAL_TREE, ) tower_grads.append(grads) # keep track of loss across all GPUs train_perplexity += loss print_variable_summary() # calculate the mean of each gradient across all GPUs grads = average_gradients(tower_grads, options['batch_size'], options) grads, norm_summary_ops = clip_grads(grads, options, True, global_step) norm_summaries.extend(norm_summary_ops) # log the training perplexity train_perplexity = tf.exp(train_perplexity / n_gpus) perplexity_summmary = tf.summary.scalar('train_perplexity', train_perplexity) # some histogram summaries. all models use the same parameters # so only need to summarize one histogram_summaries = [ tf.summary.histogram('token_embedding', models[0].embedding) ] # tensors of the output from the LSTM layer lstm_out = tf.get_collection('lstm_output_embeddings') histogram_summaries.append( tf.summary.histogram('lstm_embedding_0', lstm_out[0])) if options.get('bidirectional', False): # also have the backward embedding histogram_summaries.append( tf.summary.histogram('lstm_embedding_1', lstm_out[1])) # apply the gradients to create the training operation train_op = opt.apply_gradients(grads, global_step=global_step) # histograms of variables for v in tf.global_variables(): histogram_summaries.append(\ tf.summary.histogram(v.name.replace(":", "_"), v)) # get the gradient updates -- these aren't histograms, but we'll # only update them when histograms are computed histogram_summaries.extend(summary_gradient_updates(grads, opt, lr)) saver = tf.train.Saver(tf.global_variables(), max_to_keep=2) summary_op = tf.summary.merge([perplexity_summmary] + norm_summaries) hist_summary_op = tf.summary.merge(histogram_summaries) init = tf.initializers.global_variables() # do the training loop bidirectional = options.get('bidirectional', False) config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init) # load the checkpoint data if needed if restart_ckpt_file is not None: loader = tf.train.Saver() loader.restore(sess, restart_ckpt_file) summary_writer = tf.summary.FileWriter(tf_log_dir, sess.graph) # For each batch: # Get a batch of data from the generator. The generator will # yield batches of size batch_size * n_gpus that are sliced # and fed for each required placeholer. batch_size = options['batch_size'] unroll_steps = options['unroll_steps'] epochs = options['n_epochs'] log_interval = options['log_interval'] checkpoint_interval = options['checkpoint_interval'] char_inputs = 'char_cnn' in options logger.info('Start training loop') t1 = time.time() for epoch in range(epochs): data_gen = data.iter_batches(batch_size * n_gpus, unroll_steps) for batch_no, batch in enumerate(data_gen, start=1): # slice the input in the batch for the feed_dict X = batch feed_dict = {} for k in range(n_gpus): model = models[k] start = k * batch_size end = (k + 1) * batch_size feed_dict.update( _get_feed_dict_from_X(X, start, end, model, char_inputs, bidirectional)) if batch_no % checkpoint_interval != 0: ret = sess.run([train_op, summary_op, train_perplexity], feed_dict=feed_dict) else: # also run the histogram summaries ret = sess.run([ train_op, summary_op, train_perplexity, hist_summary_op ], feed_dict=feed_dict) if batch_no % checkpoint_interval == 0: summary_writer.add_summary(ret[3], batch_no) if batch_no % log_interval == 0: # write the summaries to tensorboard and display perplexity summary_writer.add_summary(ret[1], batch_no) logger.info(f'Epoch {epoch} | Batch {batch_no} | ' f'train_perplexity={ret[2]}') logger.info(f'Total time: {time.time() - t1}') if batch_no % checkpoint_interval == 0: # save the model checkpoint_path = os.path.join(tf_save_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=global_step) checkpoint_path = os.path.join(tf_save_dir, f'model_epoch{epoch:02d}.ckpt') saver.save(sess, checkpoint_path, global_step=global_step)