def moving_avg(name, x, vals={}, avg_win_size=100): ut.add_dict_list(vals, name, x) return np.mean(vals[name][-avg_win_size:])
def make_train_model(self): with tf.device(self.default_gpu): pr = self.pr # steps self.step = tf.get_variable( 'global_step', [], trainable = False, initializer = tf.constant_initializer(0), dtype = tf.int64) self.lr = tf.constant(pr.base_lr) # model scale = pr.gamma ** tf.floor(cast_float(self.step) / float(pr.step_size)) self.lr_step = pr.base_lr * scale #lr = tf.Print(lr, [lr, lr*1e3, scale]) opt = shift.make_opt(pr.opt_method, self.lr_step, pr) self.inputs = read_data(pr, self.gpus) gpu_grads, gpu_losses = {}, {} for i, gpu in enumerate(self.gpus): with tf.device(gpu): reuse = (i > 0) ims = self.inputs[i]['ims'] samples = self.inputs[i]['samples'] labels = self.inputs[i]['label'] net = make_net(ims, samples, pr, reuse = reuse, train = self.is_training) self.loss = tfu.Loss('loss') self.loss.add_loss(shift.slim_losses_with_prefix(None), 'reg') self.loss.add_loss_acc(label_loss(net.logits, labels), 'label') grads = opt.compute_gradients(self.loss.total_loss()) ut.add_dict_list(gpu_grads, self.loss.name, grads) ut.add_dict_list(gpu_losses, self.loss.name, self.loss) if i == 0: self.net = net (gs, vs) = zip(*tfu.average_grads(gpu_grads['loss'])) if pr.grad_clip is not None: gs, _ = tf.clip_by_global_norm(gs, pr.grad_clip) gs = [tfu.print_every(gs[0], 100, ['grad norm:', tf.global_norm(gs)])] + list(gs[1:]) gvs = zip(gs, vs) #for g, v in zip(grads, vs): # if g[0] is not None: # tf.summary.scalar('%s_grad_norm' % v.name, tf.reduce_sum(g[0]**2)**0.5) # tf.summary.scalar('%s_val_norm' % v.name, tf.reduce_sum(v**2)**0.5) #self.train_op = opt.apply_gradients(gvs, global_step = self.step) bn_ups = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # self.train_op = tf.group(self.train_op, *bn_ups) with tf.control_dependencies(bn_ups): self.train_op = opt.apply_gradients(gvs, global_step = self.step) self.coord = tf.train.Coordinator() self.saver_fast = tf.train.Saver() self.saver_slow = tf.train.Saver(max_to_keep = 1000) #self.init_op = tf.global_variables_initializer() if self.is_training: self.init_op = tf.group( tf.global_variables_initializer(), tf.local_variables_initializer()) self.sess.run(self.init_op) tf.train.start_queue_runners(sess = self.sess, coord = self.coord) self.merged_summary = tf.summary.merge_all() print 'Tensorboard command:' summary_dir = ut.mkdir(pj(pr.summary_dir, ut.simple_timestamp())) print 'tensorboard --logdir=%s' % summary_dir self.sum_writer = tf.summary.FileWriter(summary_dir, self.sess.graph) if self.profile: self.profiler = tf.profiler.Profiler(self.sess.graph)
def make_train_ops(self): pr = self.pr # steps self.step = tf.get_variable('global_step', [], trainable=False, initializer=tf.constant_initializer(0), dtype=tf.int64) #self.lr = tf.constant(pr.base_lr) # model scale = pr.gamma**tf.floor(cast_float(self.step) / float(pr.step_size)) self.lr = pr.base_lr * scale opt = make_opt(pr.opt_method, self.lr, pr) self.inputs = read_data(pr, self.gpus) gpu_grads, gpu_losses = {}, {} for i, gpu in enumerate(self.gpus): with tf.device(gpu): reuse = (i > 0) ims = self.inputs[i]['ims'] all_samples = self.inputs[i]['samples'] ytids = self.inputs[i]['ytids'] assert not pr.do_shift snd = mix_sounds(all_samples, pr) net = make_net(ims, snd.samples, snd.spec, snd.phase, pr, reuse=reuse, train=self.is_training) gen_loss, discrim_loss = make_loss(net, snd, pr, reuse=reuse, train=self.is_training) if pr.gan_weight <= 0: grads = opt.compute_gradients(gen_loss.total_loss()) else: # doesn't work with baselines, such as I3D #raise RuntimeError() print 'WARNING: DO NOT USE GAN WITH I3D' var_list = vars_with_prefix('gen') + vars_with_prefix( 'im') + vars_with_prefix('sf') grads = opt.compute_gradients(gen_loss.total_loss(), var_list=var_list) ut.add_dict_list(gpu_grads, 'gen', grads) ut.add_dict_list(gpu_losses, 'gen', gen_loss) var_list = vars_with_prefix('discrim') if pr.gan_weight <= 0: grads = [] else: grads = opt.compute_gradients(discrim_loss.total_loss(), var_list=var_list) ut.add_dict_list(gpu_grads, 'discrim', grads) ut.add_dict_list(gpu_losses, 'discrim', discrim_loss) if i == 0: self.net = net self.show_train = self.make_show_op(net, ims, snd, ytids) self.gen_loss = gpu_losses['gen'][0] self.discrim_loss = gpu_losses['discrim'][0] self.train_ops = {} self.loss_names = {} self.loss_vals = {} ops = [] for name in ['gen', 'discrim']: if pr.gan_weight <= 0. and name == 'discrim': op = tf.no_op() else: (gs, vs) = zip(*mu.average_grads(gpu_grads[name])) if pr.grad_clip is not None: gs, _ = tf.clip_by_global_norm(gs, pr.grad_clip) #gs = [mu.print_every(gs[0], 100, ['%s grad norm:' % name, tf.global_norm(gs)])] + list(gs[1:]) gvs = zip(gs, vs) #bn_ups = slim_ups_with_prefix(name) #bn_ups = slim_ups_with_prefix(None) if name == 'gen': bn_ups = tf.get_collection(tf.GraphKeys.UPDATE_OPS) else: bn_ups = slim_ups_with_prefix('discrim') print 'Number of batch norm ups for', name, len(bn_ups) with tf.control_dependencies(bn_ups): op = opt.apply_gradients(gvs) #op = tf.group(opt.apply_gradients(gvs, global_step = (self.step if name == 'discrim' else None)), *bn_ups) #op = tf.group(opt.apply_gradients(gvs), *bn_ups) ops.append(op) self.train_ops[name] = op loss = (self.gen_loss if name == 'gen' else self.discrim_loss) self.loss_names[name] = loss.get_loss_names() self.loss_vals[name] = loss.get_losses() self.update_step = self.step.assign(self.step + 1) if pr.gan_weight > 0: self.train_op = tf.group(*(ops + [self.update_step])) else: print 'Only using generator, because gan_weight = %.2f' % pr.gan_weight self.train_op = tf.group(ops[0], self.update_step)
# out_db[name] = map(convert_im, in_db[name].value) with h5py.File(out_file, 'w') as out_db: vals = {} for k in sorted(in_db.keys()): if k.startswith('step_'): im_names = ['GelSightA_image', 'GelSightB_image', 'color_image_KinectA', 'color_image_KinectB'] value_names = ['depth_image_KinectA', 'depth_image_KinectB', 'timestamp'] for name in im_names: ut.add_dict_list(vals, name, convert_im(in_db[k][name].value)) for name in value_names: ut.add_dict_list(vals, name, in_db[k][name].value) else: #out_db.create_dataset(k, data = in_db[k].value if hasattr(in_db[k], 'value') else in_db[k]) if hasattr(in_db[k], 'value'): out_db.create_dataset(k, data = in_db[k].value) else: print 'skipping:', k for name in vals: out_db.create_dataset(name, data=vals[name]) print 'Size before:' os.system('du -ch %s' % in_file)
def make_model(self): with tf.device(self.default_gpu): pr = self.pr # steps self.step = tf.get_variable('global_step', [], trainable=False, initializer=tf.constant_initializer(0), dtype=tf.int64) self.lr = tf.constant(pr.base_lr) # model opt = make_opt(pr.opt_method, pr.base_lr, pr) self.inputs = read_data(pr, self.gpus) gpu_grads, gpu_losses = {}, {} for i, gpu in enumerate(self.gpus): with tf.device(gpu): reuse = (i > 0) with tf.device('/cpu:0'): ims = self.inputs[i]['ims'] samples_ex = self.inputs[i]['samples'] assert pr.both_examples assert not pr.small_augment labels = tf.random_uniform([shape(ims, 0)], 0, 2, dtype=tf.int64, name='labels_sample') samples0 = tf.where(tf.equal(labels, 1), samples_ex[:, 1], samples_ex[:, 0]) samples1 = tf.where(tf.equal(labels, 0), samples_ex[:, 1], samples_ex[:, 0]) labels1 = 1 - labels net0 = make_net(ims, samples0, pr, reuse=reuse, train=self.is_training) net1 = make_net(None, samples1, pr, im_net=net0.im_net, reuse=True, train=self.is_training) labels = tf.concat([labels, labels1], 0) net = ut.Struct( logits=tf.concat([net0.logits, net1.logits], 0), cam=tf.concat([net0.cam, net1.cam], 0), last_conv=tf.concat([net0.last_conv, net1.last_conv], 0)) loss = mu.Loss('loss') loss.add_loss(slim_losses_with_prefix(None), 'reg') loss.add_loss_acc(sigmoid_loss(net.logits, labels), 'label') grads = opt.compute_gradients(loss.total_loss()) ut.add_dict_list(gpu_grads, loss.name, grads) ut.add_dict_list(gpu_losses, loss.name, loss) #self.loss = loss if i == 0: self.net = net self.loss = mu.merge_losses(gpu_losses['loss']) for name, val in zip(self.loss.get_loss_names(), self.loss.get_losses()): tf.summary.scalar(name, val) if not self.is_training: #pr_test = pr.copy() pr_test = self.pr_test.copy() pr_test.augment_ims = False print 'pr_test =' print pr_test self.test_ims, self.test_samples, self.test_ytids = mu.on_cpu( lambda: shift_dset.make_db_reader( pr_test.test_list, pr_test, pr.test_batch, ['im', 'samples', 'ytid'], one_pass=True)) if pr_test.do_shift: self.test_labels = tf.random_uniform( [shape(self.test_ims, 0)], 0, 2, dtype=tf.int64) self.test_samples = tf.where(tf.equal(self.test_labels, 1), self.test_samples[:, 1], self.test_samples[:, 0]) else: self.test_labels = tf.ones(shape(self.test_ims, 0), dtype=tf.int64) #self.test_samples = tf.where(tf.equal(self.test_labels, 1), self.test_samples[:, 1], self.test_samples[:, 0]) print 'sample shape:', shape(self.test_samples) self.test_net = make_net(self.test_ims, self.test_samples, pr_test, reuse=True, train=self.is_training) (gs, vs) = zip(*mu.average_grads(gpu_grads['loss'])) if pr.grad_clip is not None: gs, _ = tf.clip_by_global_norm(gs, pr.grad_clip) gs = [ mu.print_every(gs[0], 100, ['grad norm:', tf.global_norm(gs)]) ] + list(gs[1:]) gvs = zip(gs, vs) bn_ups = tf.get_collection(tf.GraphKeys.UPDATE_OPS) if pr.multipass: ops = [ opt.apply_gradients(gvs, global_step=self.step) for i in xrange(pr.multipass_count) ] def op_helper(count=[0]): op = ops[count[0] % len(ops)] count[0] += 1 return op self.train_op = op_helper else: op = tf.group(opt.apply_gradients(gvs, global_step=self.step), *bn_ups) self.train_op = lambda: op self.coord = tf.train.Coordinator() self.saver = tf.train.Saver() self.init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) self.sess.run(self.init_op) tf.train.start_queue_runners(sess=self.sess, coord=self.coord) self.merged_summary = tf.summary.merge_all() print 'Tensorboard command:' summary_dir = ut.mkdir(pj(pr.summary_dir, ut.simple_timestamp())) print 'tensorboard --logdir=%s' % summary_dir self.sum_writer = tf.summary.FileWriter(summary_dir, self.sess.graph)