def train(self): ''' Setup computation graph, run 2 prefetch data threads, and then run the main loop ''' if not os.path.exists(self.H['save_dir']): os.makedirs(self.H['save_dir']) ckpt_file = self.H['save_dir'] + '/save.ckpt' with open(self.H['save_dir'] + '/hypes.json', 'w') as f: json.dump(self.H, f, indent=4) self.H["grid_width"] = self.H["image_width"] / self.H["region_size"] self.H["grid_height"] = self.H["image_height"] / self.H["region_size"] x_in = tf.placeholder(tf.float32) confs_in = tf.placeholder(tf.float32) boxes_in = tf.placeholder(tf.float32) q = {} enqueue_op = {} for phase in ['train', 'test']: dtypes = [tf.float32, tf.float32, tf.float32] grid_size = self.H['grid_width'] * self.H['grid_height'] shapes = ( [self.H['image_height'], self.H['image_width'], 3], [grid_size, self.H['rnn_len'], self.H['num_classes']], [grid_size, self.H['rnn_len'], 4], ) q[phase] = tf.FIFOQueue(capacity=30, dtypes=dtypes, shapes=shapes) enqueue_op[phase] = q[phase].enqueue((x_in, confs_in, boxes_in)) def make_feed(d): return {x_in: d['image'], confs_in: d['confs'], boxes_in: d['boxes'], learning_rate: self.H['solver']['learning_rate']} def thread_loop(sess, enqueue_op, phase, gen): for d in gen: sess.run(enqueue_op[phase], feed_dict=make_feed(d)) (config, loss, accuracy, summary_op, train_op, smooth_op, global_step, learning_rate) = self.build(q) saver = tf.train.Saver(max_to_keep=None) writer = tf.summary.FileWriter( logdir=self.H['save_dir'], flush_secs=10 ) with tf.Session(config=config) as sess: tf.train.start_queue_runners(sess=sess) for phase in ['train', 'test']: # enqueue once manually to avoid thread start delay gen = train_utils.load_data_gen(self.H, phase, jitter=self.H['solver']['use_jitter']) d = gen.next() sess.run(enqueue_op[phase], feed_dict=make_feed(d)) t = threading.Thread(target=thread_loop, args=(sess, enqueue_op, phase, gen)) t.daemon = True t.start() tf.set_random_seed(self.H['solver']['rnd_seed']) sess.run(tf.global_variables_initializer()) writer.add_graph(sess.graph) weights_str = self.H['solver']['weights'] if len(weights_str) > 0: print('Restoring from: %s' % weights_str) saver.restore(sess, weights_str) elif self.H['slim_basename'] == 'MobilenetV1': saver.restore(sess, self.H['slim_ckpt']) else: gvars = [x for x in tf.global_variables() if x.name.startswith(self.H['slim_basename']) and self.H['solver']['opt'] not in x.name] gvars = [x for x in gvars if not x.name.startswith("{}/AuxLogits".format(self.H['slim_basename']))] init_fn = slim.assign_from_checkpoint_fn( '%s/data/%s' % (os.path.dirname(os.path.realpath(__file__)), self.H['slim_ckpt']), gvars, ignore_missing_vars=False) # init_fn = slim.assign_from_checkpoint_fn( # '%s/data/inception_v1.ckpt' % os.path.dirname(os.path.realpath(__file__)), # [x for x in tf.global_variables() if x.name.startswith('InceptionV1') and not self.H['solver']['opt'] in x.name]) init_fn(sess) # train model for N iterations start = time.time() max_iter = self.H['solver'].get('max_iter', 10000000) for i in xrange(max_iter): display_iter = self.H['logging']['display_iter'] adjusted_lr = (self.H['solver']['learning_rate'] * 0.5 ** max(0, (i / self.H['solver']['learning_rate_step']) - 2)) lr_feed = {learning_rate: adjusted_lr} if i % display_iter != 0: # train network batch_loss_train, _ = sess.run([loss['train'], train_op], feed_dict=lr_feed) else: # test network every N iterations; log additional info if i > 0: dt = (time.time() - start) / (self.H['batch_size'] * display_iter) start = time.time() (train_loss, test_accuracy, summary_str, _, _) = sess.run([loss['train'], accuracy['test'], summary_op, train_op, smooth_op, ], feed_dict=lr_feed) writer.add_summary(summary_str, global_step=global_step.eval()) print_str = string.join([ 'Step: %d', 'lr: %f', 'Train Loss: %.2f', 'Softmax Test Accuracy: %.1f%%', 'Time/image (ms): %.1f' ], ', ') print(print_str % (i, adjusted_lr, train_loss, test_accuracy * 100, dt * 1000 if i > 0 else 0)) if global_step.eval() % self.H['logging']['save_iter'] == 0 or global_step.eval() == max_iter - 1: saver.save(sess, ckpt_file, global_step=global_step)
hidden_size = int(opts.hidden_size) depth = int(opts.depth) core_size = int(opts.core_size) MAX_NCAND = int(opts.cand_size) #gpu_options = tf.GPUOptions(allow_growth=True) #session = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) session = tf.Session() _input_atom = tf.placeholder(tf.float32, [None, None, adim]) _input_bond = tf.placeholder(tf.float32, [None, None, bdim]) _atom_graph = tf.placeholder(tf.int32, [None, None, max_nb, 2]) _bond_graph = tf.placeholder(tf.int32, [None, None, max_nb, 2]) _num_nbs = tf.placeholder(tf.int32, [None, None]) _src_holder = [_input_atom, _input_bond, _atom_graph, _bond_graph, _num_nbs] q = tf.FIFOQueue(100, [tf.float32, tf.float32, tf.int32, tf.int32, tf.int32]) enqueue = q.enqueue(_src_holder) input_atom, input_bond, atom_graph, bond_graph, num_nbs = q.dequeue() input_atom.set_shape([None, None, adim]) input_bond.set_shape([None, None, bdim]) atom_graph.set_shape([None, None, max_nb, 2]) bond_graph.set_shape([None, None, max_nb, 2]) num_nbs.set_shape([None, None]) graph_inputs = (input_atom, input_bond, atom_graph, bond_graph, num_nbs) with tf.variable_scope("encoder"): _, fp = rcnn_wl_last(graph_inputs, hidden_size=hidden_size, depth=depth) reactant = fp[0:1, :] candidates = fp[1:, :] candidates = candidates - reactant
def __init__(self, config): self.rng = np.random.RandomState(config.random_seed) self.root = config.data_path self.root_val = config.valid_dataset_dir # read data generation arguments self.args = {} with open(os.path.join(self.root, 'args.txt'), 'r') as f: while True: line = f.readline() if not line: break arg, arg_value = line[:-1].split(': ') self.args[arg] = arg_value self.is_3d = config.is_3d if 'ae' in config.arch: def sortf(x): nf = int(self.args['num_frames']) n = os.path.basename(x)[:-4].split('_') return int(n[0]) * nf + int(n[1]) self.paths = sorted(glob("{}/{}/*".format(self.root, config.data_type[0])), key=sortf) # num_path = len(self.paths) # num_train = int(num_path*0.95) # self.test_paths = self.paths[num_train:] # self.paths = self.paths[:num_train] else: self.paths = sorted( glob("{}/{}/*".format(self.root, config.data_type[0]))) self.valid_paths = glob("{}/*".format(self.root_val)) self.num_samples = len(self.paths) self.num_samples_validation = len(self.valid_paths) assert (self.num_samples > 0) self.batch_size = config.batch_size self.epochs_per_step = self.batch_size / float( self.num_samples) # per epoch self.data_type = config.data_type #if self.data_type == 'velocity': # if self.is_3d: depth = 3 # else: depth = 2 #else: depth = 1 self.res_x = config.res_x self.res_y = config.res_y self.res_z = config.res_z self.depth = depth self.c_num = int(self.args['num_param']) if self.is_3d: feature_dim = [self.res_z, self.res_y, self.res_x, self.depth] geom_dim = [self.res_z, self.res_y, self.res_x, 3] else: feature_dim = [self.res_y, self.res_x, self.depth] if 'ae' in config.arch: self.dof = int(self.args['num_dof']) label_dim = [self.dof, int(self.args['num_frames'])] else: label_dim = [self.c_num] if self.is_3d: min_after_dequeue = 500 ################# else: min_after_dequeue = 5000 capacity = min_after_dequeue + 3 * self.batch_size #self.q = tf.FIFOQueue(capacity, [tf.float32, tf.float32], [feature_dim, label_dim]) self.q = tf.FIFOQueue(capacity, [ tf.float32, tf.float32, tf.float32, tf.float32, tf.float32, tf.float32 ], [ feature_dim, label_dim, geom_dim, feature_dim, label_dim, geom_dim ]) self.x = tf.placeholder(dtype=tf.float32, shape=feature_dim) #print("here ",feature_dim, label_dim) self.y = tf.placeholder(dtype=tf.float32, shape=label_dim) self.geom = tf.placeholder(dtype=tf.float32, shape=geom_dim) # self.q_val = tf.FIFOQueue(capacity, [tf.float32, tf.float32, tf.float32], [feature_dim, label_dim, geom_dim], # name='fifo_queue_val') self.x_val = tf.placeholder(dtype=tf.float32, shape=feature_dim, name='x_val_placeholder') # print("here ",feature_dim, label_dim) self.y_val = tf.placeholder(dtype=tf.float32, shape=label_dim, name='y_val_placeholder') self.geom_val = tf.placeholder(dtype=tf.float32, shape=geom_dim, name='geom_val_placeholder') # self.enqueue_val = self.q_val.enqueue([self.x_val, self.y_val, self.geom_val], name='enqueue_val_operation') # self.num_threads_val = 1 # TODO: this is hardcoded for the time being self.enqueue = self.q.enqueue( [self.x, self.y, self.geom, self.x_val, self.y_val, self.geom_val]) self.num_threads = np.amin( [config.num_worker, multiprocessing.cpu_count(), self.batch_size]) r = np.loadtxt( os.path.join(self.root, self.data_type[0] + '_range.txt')) self.x_range = max(abs(r[0]), abs(r[1])) self.y_range = [] self.y_num = [] if 'ae' in config.arch: for i in range(self.c_num): p_name = self.args['p%d' % i] p_min = float(self.args['min_{}'.format(p_name)]) p_max = float(self.args['max_{}'.format(p_name)]) p_num = int(self.args['num_{}'.format(p_name)]) self.y_num.append(p_num) for i in range(label_dim[0]): self.y_range.append([-1, 1]) else: print(self.c_num) for i in range(self.c_num): p_name = self.args['p%d' % i] p_min = float(self.args['min_{}'.format(p_name)]) p_max = float(self.args['max_{}'.format(p_name)]) p_num = int(self.args['num_{}'.format(p_name)]) self.y_range.append([p_min, p_max]) self.y_num.append(p_num) print("initial_range", self.y_range)
def prefetch_input_data(reader, file_pattern, is_training, batch_size, values_per_shard, input_queue_capacity_factor=16, num_reader_threads=1, shard_queue_name="filename_queue", value_queue_name="input_queue"): """Prefetches string values from disk into an input queue. In training the capacity of the queue is important because a larger queue means better mixing of training examples between shards. The minimum number of values kept in the queue is values_per_shard * input_queue_capacity_factor, where input_queue_memory factor should be chosen to trade-off better mixing with memory usage. Args: reader: Instance of tf.ReaderBase. file_pattern: Comma-separated list of file patterns (e.g. /tmp/train_data-?????-of-00100). is_training: Boolean; whether prefetching for training or eval. batch_size: Model batch size used to determine queue capacity. values_per_shard: Approximate number of values per shard. input_queue_capacity_factor: Minimum number of values to keep in the queue in multiples of values_per_shard. See comments above. num_reader_threads: Number of reader threads to fill the queue. shard_queue_name: Name for the shards filename queue. value_queue_name: Name for the values input queue. Returns: A Queue containing prefetched string values. """ data_files = [] for pattern in file_pattern.split(","): data_files.extend(tf.gfile.Glob(pattern)) if not data_files: tf.logging.fatal("Found no input files matching %s", file_pattern) else: tf.logging.info("Prefetching values from %d files matching %s", len(data_files), file_pattern) if is_training: print(" is_training == True : RandomShuffleQueue") filename_queue = tf.train.string_input_producer(data_files, shuffle=True, capacity=16, name=shard_queue_name) min_queue_examples = values_per_shard * input_queue_capacity_factor capacity = min_queue_examples + 100 * batch_size values_queue = tf.RandomShuffleQueue(capacity=capacity, min_after_dequeue=min_queue_examples, dtypes=[tf.string], name="random_" + value_queue_name) else: print(" is_training == False : FIFOQueue") filename_queue = tf.train.string_input_producer(data_files, shuffle=False, capacity=1, name=shard_queue_name) capacity = values_per_shard + 3 * batch_size values_queue = tf.FIFOQueue(capacity=capacity, dtypes=[tf.string], name="fifo_" + value_queue_name) enqueue_ops = [] for _ in range(num_reader_threads): _, value = reader.read(filename_queue) enqueue_ops.append(values_queue.enqueue([value])) tf.train.queue_runner.add_queue_runner(tf.train.queue_runner.QueueRunner(values_queue, enqueue_ops)) tf.summary.scalar("queue/%s/fraction_of_%d_full" % (values_queue.name, capacity), tf.cast(values_queue.size(), tf.float32) * (1. / capacity)) return values_queue
for path in [ckpt_path, hypes_path, summary_path]: if not os.path.exists(path): os.makedirs(path) def enqueue_thread(sess, data_gen, coord, phase, enqueue_op): while not coord.should_stop(): img, label = data_gen.next() sess.run(enqueue_op, feed_dict={x_in[phase]: img, y_in[phase]: label}) x_in, y_in, queues, enqueue_op = {}, {}, {}, {} shape = ((im_width, im_height, num_channel), (im_width, im_height)) for phase in ['train', 'validate']: x_in[phase] = tf.placeholder(dtype=tf.float32) y_in[phase] = tf.placeholder(dtype=tf.float32) queues[phase] = tf.FIFOQueue( capacity=queue_size, shapes=shape, dtypes=(tf.float32, tf.float32)) enqueue_op[phase] = queues[phase].enqueue_many([x_in[phase], y_in[phase]]) loss, accuracy, train_op, summary_op, learning_rate, global_step = \ build(queues, H) data_gen = {} for phase in ['train', 'validate']: is_train = {'train': True, 'validate': False}[phase] data_gen[phase] = train_utils.input_data( crop_per_img=1, class_id=class_type, reflection=True, rotation=360, train=is_train, crop_size=im_width) # Run the generator once to make sure the data is loaded into the memory # This will take a few minutes data_gen[phase].next() sys.stdout.write('{} training images: {}\n'.format(
#!/usr/bin/env python # -*- coding:utf-8 -*- __author__ = 'fjl' import tensorflow as tf # 1\首先定义队列 Q = tf.FIFOQueue(3, tf.float32) en_qmany = Q.enqueue_many([ [0.1, 0.2, 0.3], ]) # 2\d out_q = Q.dequeue() data = out_q + 1 en_q = Q.enqueue(data) with tf.Session() as sess: sess.run(en_qmany) # 处理数据 for i in range(100): sess.run(en_q) for i in range(Q.size().eval()): print(sess.run(Q.dequeue()))
def build_input(dataset, data_path, batch_size, mode): """Build CIFAR image and labels. Args: dataset: Either 'cifar10' or 'cifar100'. data_path: Filename for data. batch_size: Input batch size. mode: Either 'train' or 'eval'. Returns: images: Batches of images. [batch_size, image_size, image_size, 3] labels: Batches of labels. [batch_size, num_classes] Raises: ValueError: when the specified dataset is not supported. """ image_size = 32 if dataset == 'cifar10': label_bytes = 1 label_offset = 0 num_classes = 10 elif dataset == 'cifar100': label_bytes = 1 label_offset = 1 num_classes = 100 else: raise ValueError('Not supported dataset %s', dataset) depth = 3 image_bytes = image_size * image_size * depth record_bytes = label_bytes + label_offset + image_bytes data_files = tf.gfile.Glob(data_path) file_queue = tf.train.string_input_producer(data_files, shuffle=True) # Read examples from files in the filename queue. reader = tf.FixedLengthRecordReader(record_bytes=record_bytes) _, value = reader.read(file_queue) # Convert these examples to dense labels and processed images. record = tf.reshape(tf.decode_raw(value, tf.uint8), [record_bytes]) label = tf.cast(tf.slice(record, [label_offset], [label_bytes]), tf.int32) # Convert from string to [depth * height * width] to [depth, height, width]. depth_major = tf.reshape( tf.slice(record, [label_offset + label_bytes], [image_bytes]), [depth, image_size, image_size]) # Convert from [depth, height, width] to [height, width, depth]. image = tf.cast(tf.transpose(depth_major, [1, 2, 0]), tf.float32) if mode == 'train': image = tf.image.resize_image_with_crop_or_pad(image, image_size + 4, image_size + 4) image = tf.random_crop(image, [image_size, image_size, 3]) image = tf.image.random_flip_left_right(image) # Brightness/saturation/constrast provides small gains .2%~.5% on cifar. # image = tf.image.random_brightness(image, max_delta=63. / 255.) # image = tf.image.random_saturation(image, lower=0.5, upper=1.5) # image = tf.image.random_contrast(image, lower=0.2, upper=1.8) image = tf.image.per_image_standardization(image) example_queue = tf.RandomShuffleQueue( capacity=16 * batch_size, min_after_dequeue=8 * batch_size, dtypes=[tf.float32, tf.int32], shapes=[[image_size, image_size, depth], [1]]) num_threads = 16 else: image = tf.image.resize_image_with_crop_or_pad(image, image_size, image_size) image = tf.image.per_image_standardization(image) example_queue = tf.FIFOQueue(3 * batch_size, dtypes=[tf.float32, tf.int32], shapes=[[image_size, image_size, depth], [1]]) num_threads = 1 example_enqueue_op = example_queue.enqueue([image, label]) tf.train.add_queue_runner( tf.train.queue_runner.QueueRunner(example_queue, [example_enqueue_op] * num_threads)) # Read 'batch' labels + images from the example queue. images, labels = example_queue.dequeue_many(batch_size) labels = tf.reshape(labels, [batch_size, 1]) indices = tf.reshape(tf.range(0, batch_size, 1), [batch_size, 1]) labels = tf.sparse_to_dense(tf.concat(values=[indices, labels], axis=1), [batch_size, num_classes], 1.0, 0.0) assert len(images.get_shape()) == 4 assert images.get_shape()[0] == batch_size assert images.get_shape()[-1] == 3 assert len(labels.get_shape()) == 2 assert labels.get_shape()[0] == batch_size assert labels.get_shape()[1] == num_classes # Display the training images in the visualizer. tf.summary.image('images', images) return images, labels
def build_input(data, batch_size, dataset, train): """Build CIFAR image and labels. Args: data_path: Filename for cifar10 data. batch_size: Input batch size. train: True if we are training and false if we are testing. Returns: images: Batches of images of size [batch_size, image_size, image_size, 3]. labels: Batches of labels of size [batch_size, num_classes]. Raises: ValueError: When the specified dataset is not supported. """ images_constant = tf.constant(data[0]) labels_constant = tf.constant(data[1]) image_size = 32 depth = 3 num_classes = 10 if dataset == "cifar10" else 100 image, label = tf.train.slice_input_producer([images_constant, labels_constant], capacity=16 * batch_size) if train: image = tf.image.resize_image_with_crop_or_pad(image, image_size + 4, image_size + 4) image = tf.random_crop(image, [image_size, image_size, 3]) image = tf.image.random_flip_left_right(image) image = tf.image.per_image_standardization(image) example_queue = tf.RandomShuffleQueue( capacity=16 * batch_size, min_after_dequeue=8 * batch_size, dtypes=[tf.float32, tf.int32], shapes=[[image_size, image_size, depth], [1]]) num_threads = 16 else: image = tf.image.resize_image_with_crop_or_pad(image, image_size, image_size) image = tf.image.per_image_standardization(image) example_queue = tf.FIFOQueue( 3 * batch_size, dtypes=[tf.float32, tf.int32], shapes=[[image_size, image_size, depth], [1]]) num_threads = 1 example_enqueue_op = example_queue.enqueue([image, label]) tf.train.add_queue_runner(tf.train.queue_runner.QueueRunner( example_queue, [example_enqueue_op] * num_threads)) # Read "batch" labels + images from the example queue. images, labels = example_queue.dequeue_many(batch_size) labels = tf.reshape(labels, [batch_size, 1]) indices = tf.reshape(tf.range(0, batch_size, 1), [batch_size, 1]) labels = tf.sparse_to_dense( tf.concat([indices, labels], 1), [batch_size, num_classes], 1.0, 0.0) assert len(images.get_shape()) == 4 assert images.get_shape()[0] == batch_size assert images.get_shape()[-1] == 3 assert len(labels.get_shape()) == 2 assert labels.get_shape()[0] == batch_size assert labels.get_shape()[1] == num_classes if not train: tf.summary.image("images", images) return images, labels
# specify dataset path path_prefix = '/mnt/terabyte/datasets/imagenet/caffe/ilsvrc12_' path_postfix = '_lmdb' supported_modes = ['train', 'val'] mode = supported_modes[0] full_path = path_prefix + mode + path_postfix # specify how many datums to read at once batch_length = 11 # set numpy array print options np.set_printoptions(threshold=21) reader = tf.LMDBReader(name='reader') keys_queue = tf.FIFOQueue( capacity=32, dtypes=[dtypes.string], shapes=()) # scenario 1 (buggy) keys1, values1 = reader.read_up_to(keys_queue, batch_length) jpg_buffer1 = tf.decode_raw(values1, out_type=tf.uint8) # scenario 2 (good) keys2, values2 = reader.read_up_to(keys_queue, 11) jpg_buffer2 = tf.decode_raw(values2, out_type=tf.uint8) with tf.Session() as sess: keys_queue.enqueue([full_path]).run() keys_queue.close().run() buffer2 = sess.run(jpg_buffer2) print(buffer2.shape)
def create_done_queue(i): """Queue used to signal death for i'th worker.""" return tf.FIFOQueue(1, tf.int32, shared_name="done_queue" + str(i))
def decoding_queue(logits_queue, num_threads=6): """ Build the decoding queue graph. Args: logits_queue: the logits queue. num_threads: number of threads. Return: decode_predict: (decoded_sparse_tensor,decoded_probability) decoded_sparse_tensor is a [sparse tensor] decode_prob: a [batch_size] array contain the probability of each path. decode_fname: a [batch_size] array contain the filenames. decode_idx: a [batch_size] array contain the indexs. decodeedQueue.size(): The number of instances in the queue. """ q_logits, q_name, q_index, seq_length = logits_queue.dequeue() batch_n = q_logits.get_shape().as_list()[0] if FLAGS.extension == 'fastq': prob = path_prob(q_logits) else: prob = tf.constant( [0.0] * batch_n ) # We just need to have the right type, because of the queues if FLAGS.beam == 0: decode_decoded, decode_log_prob = tf.nn.ctc_greedy_decoder( tf.transpose(q_logits, perm=[1, 0, 2]), seq_length, merge_repeated=True) else: decode_decoded, decode_log_prob = tf.nn.ctc_beam_search_decoder( tf.transpose(q_logits, perm=[1, 0, 2]), seq_length, merge_repeated=False, beam_width=FLAGS.beam, top_paths=1 ) # There will be a second merge operation after the decoding process # if the merge_repeated for decode search decoder set to True. # Check this issue https://github.com/tensorflow/tensorflow/issues/9550 decodeedQueue = tf.FIFOQueue( capacity=2 * num_threads, dtypes=[tf.int64 for _ in decode_decoded] * 3 + [tf.float32, tf.float32, tf.string, tf.int32], ) ops = [] for x in decode_decoded: ops.append(x.indices) ops.append(x.values) ops.append(x.dense_shape) decode_enqueue = decodeedQueue.enqueue( tuple(ops + [decode_log_prob, prob, q_name, q_index])) decode_dequeue = decodeedQueue.dequeue() decode_prob, decode_fname, decode_idx = decode_dequeue[-3:] decode_dequeue = decode_dequeue[:-3] decode_predict = [[], decode_dequeue[-1]] for i in range(0, len(decode_dequeue) - 1, 3): decode_predict[0].append( tf.SparseTensor( indices=decode_dequeue[i], values=decode_dequeue[i + 1], dense_shape=decode_dequeue[i + 2], )) decode_qr = tf.train.QueueRunner(decodeedQueue, [decode_enqueue] * num_threads) tf.train.add_queue_runner(decode_qr) return decode_predict, decode_prob, decode_fname, decode_idx, decodeedQueue.size( )
with tf.device(config.device): input_state_ph = tf.placeholder(tf.float32, [config.batch_size, 84, 84, 4], name="input_state_ph") # this should be: input_state_placeholder = tf.placeholder("float",[None,84,84,4], name="state_placeholder") action_ph = tf.placeholder(tf.int64, [config.batch_size], name="Action_ph") Y_ph = tf.placeholder(tf.float32, [config.batch_size], name="Y_ph") next_Y_ph = tf.placeholder(tf.float32, [config.batch_size, action_num], name="next_Y_ph") reward_ph = tf.placeholder(tf.float32, [config.batch_size], name="reward_ph") ph_lst = [input_state_ph, action_ph, Y_ph, next_Y_ph, reward_ph] q = tf.FIFOQueue(2, [ph.dtype for ph in ph_lst], [ph.get_shape() for ph in ph_lst]) enqueue_op = q.enqueue(ph_lst) input_state, action, Y, next_Y, reward = q.dequeue() # so that i can feed inputs with different batch sizes. input_state = tf.placeholder_with_default( input_state, shape=tf.TensorShape([None]).concatenate(input_state.get_shape()[1:])) action = tf.placeholder_with_default(action, shape=[None]) next_input_state_ph = tf.placeholder(tf.float32, [config.batch_size, 84, 84, 4], name="next_input_state_placeholder") with tf.variable_scope("DQN"): Q, R, predicted_next_Q = createQNetwork(input_state, action, config, "DQN")
def find_prf_gpu( idxPrc, vecMdlXpos, vecMdlYpos, vecMdlSd, aryFunc, #noqa aryPrfTc, queOut): """ Find best fitting pRF model for voxel time course, using the GPU. Parameters ---------- idxPrc : int Process ID of the process calling this function (for CPU multi-threading). In GPU version, this parameter is 0 (just one thread on CPU). vecMdlXpos : np.array 1D array with pRF model x positions. vecMdlYpos : np.array 1D array with pRF model y positions. vecMdlSd : np.array 1D array with pRF model sizes (SD of Gaussian). aryFunc : np.array 2D array with functional MRI data, with shape aryFunc[time, voxel]. aryPrfTc : np.array Array with pRF model time courses, with shape aryPrfTc[x-pos, y-pos, SD, time] queOut : multiprocessing.queues.Queue Queue to put the results on. Returns ------- lstOut : list List containing the following objects: idxPrc : int Process ID of the process calling this function (for CPU multi-threading). In GPU version, this parameter is 0. vecBstXpos : np.array 1D array with best fitting x-position for each voxel, with shape vecBstXpos[voxel]. vecBstYpos : np.array 1D array with best fitting y-position for each voxel, with shape vecBstYpos[voxel]. vecBstSd : np.array 1D array with best fitting pRF size for each voxel, with shape vecBstSd[voxel]. vecBstR2 : np.array 1D array with R2 value of 'winning' pRF model for each voxel, with shape vecBstR2[voxel]. Notes ----- Uses a queue that runs in a separate thread to put model time courses on the computational graph. The list with results is not returned directly, but placed on the queue. This version performs the model finding on the GPU, using tensorflow. """ # ------------------------------------------------------------------------- # *** Queue-feeding-function that will run in extra thread def funcPlcIn(): """Place data on queue.""" # Iteration counter: idxCnt = 0 # Stop if coordinator says stop: while not (objCoord.should_stop()): # Feed example to Tensorflow placeholder # aryTmp02 = np.copy(lstPrfTc[idxCnt]) aryTmp02 = lstPrfTc[idxCnt] dicIn = {objPlcHld01: aryTmp02} # Push to the queue: objSess.run(objEnQ, feed_dict=dicIn) idxCnt += 1 # Stop if all data has been put on the queue: if idxCnt == varNumMdls: break # ------------------------------------------------------------------------- # *** Prepare pRF model time courses for graph print('------Prepare pRF model time courses for graph') # Number of modelled x-positions in the visual space: varNumX = aryPrfTc.shape[0] # Number of modelled y-positions in the visual space: varNumY = aryPrfTc.shape[1] # Number of modelled pRF sizes: varNumPrfSizes = aryPrfTc.shape[2] # Reshape pRF model time courses: aryPrfTc = np.reshape(aryPrfTc, ((aryPrfTc.shape[0] * aryPrfTc.shape[1] * aryPrfTc.shape[2]), aryPrfTc.shape[3])) # Change type to float 32: aryPrfTc = aryPrfTc.astype(np.float32) # The pRF model is fitted only if variance along time dimension is not # zero. Get variance along time dimension: vecVarPrfTc = np.var(aryPrfTc, axis=1) # Zero with float32 precision for comparison: varZero32 = np.array(([0.0])).astype(np.float32)[0] # Boolean array for models with variance greater than zero: vecLgcVar = np.greater(vecVarPrfTc, varZero32) # Original total number of pRF time course models (before removing models # with zero variance): varNumMdlsTtl = aryPrfTc.shape[0] # Take models with variance less than zero out of the array: aryPrfTc = aryPrfTc[vecLgcVar, :] # Add extra dimension for constant term: aryPrfTc = np.reshape(aryPrfTc, (aryPrfTc.shape[0], aryPrfTc.shape[1], 1)) # Add constant term (ones): aryPrfTc = np.concatenate( (aryPrfTc, np.ones(aryPrfTc.shape).astype(np.float32)), axis=2) # Size of pRF time courses in MB: varSzePrf = np.divide(float(aryPrfTc.nbytes), 1000000.0) print(('---------Size of pRF time courses: ' + str(np.around(varSzePrf)) + ' MB')) # Put pRF model time courses into list: lstPrfTc = [None] * aryPrfTc.shape[0] for idxMdl in range(int(aryPrfTc.shape[0])): lstPrfTc[idxMdl] = aryPrfTc[idxMdl, :, :] del (aryPrfTc) # Total number of pRF models to fit: varNumMdls = len(lstPrfTc) # ------------------------------------------------------------------------- # *** Prepare functional data for graph print('------Prepare functional data for graph') # Number of voxels to be fitted: varNumVox = aryFunc.shape[1] # Number of volumes: # varNumVol = aryFunc.shape[0] # Change type to float 32: aryFunc = aryFunc.astype(np.float32) # We cannot commit the entire functional data to GPU memory, we need to # create chunks. Establish the limit (maximum size) of one chunk (in MB): varSzeMax = 50.0 # Size of functional data in MB: varSzeFunc = np.divide(float(aryFunc.nbytes), 1000000.0) print(('---------Size of functional data: ' + str(np.around(varSzeFunc)) + ' MB')) # Number of chunks to create: varNumChnk = int(np.ceil(np.divide(varSzeFunc, varSzeMax))) print(('---------Functional data will be split into ' + str(varNumChnk) + ' batches')) # Vector with the indicies at which the functional data will be separated # in order to be chunked up for the parallel processes: vecIdxChnks = np.linspace(0, varNumVox, num=varNumChnk, endpoint=False) vecIdxChnks = np.hstack((vecIdxChnks, varNumVox)) # List into which the chunks of functional data are put: lstFunc = [None] * varNumChnk # Put functional data into chunks: for idxChnk in range(0, varNumChnk): # Index of first voxel to be included in current chunk: varChnkStr = int(vecIdxChnks[idxChnk]) # Index of last voxel to be included in current chunk: varChnkEnd = int(vecIdxChnks[(idxChnk + 1)]) # Put voxel array into list: lstFunc[idxChnk] = aryFunc[:, varChnkStr:varChnkEnd] # We delete the original array holding the functional data to conserve # memory. Therefore, we first need to calculate the mean (will be needed # for calculation of R2). # After finding the best fitting model for each voxel, we still have to # calculate the coefficient of determination (R-squared) for each voxel. We # start by calculating the total sum of squares (i.e. the deviation of the # data from the mean). The mean of each time course: vecFuncMean = np.mean(aryFunc, axis=0) # Deviation from the mean for each datapoint: vecFuncDev = np.subtract(aryFunc, vecFuncMean[None, :]) # Sum of squares: vecSsTot = np.sum(np.power(vecFuncDev, 2.0), axis=0) # We don't need the original array with the functional data anymore (the # above seems to have created a hard copy): del (vecFuncDev) del (aryFunc) # ------------------------------------------------------------------------- # *** Miscellaneous preparations # Vector for minimum squared residuals: vecResSsMin = np.zeros((varNumVox), dtype=np.float32) # Vector for indices of models with minimum residuals: vecResSsMinIdx = np.zeros((varNumVox), dtype=np.int32) # L2 regularization factor for regression: # varL2reg = 0.0 # Reduce logging verbosity: os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # Processing unit (GPU by default, if not available, will be set to CPU). strPu = '/gpu:0' # Check whether GPU is available. Otherwise, CPU will be used. Whether the # GPU availability is inferred from an environmental variable: strDevice = os.environ.get('CUDA_VISIBLE_DEVICES') if strDevice is None: strPu = '/cpu:0' strTmp = ( '---WARNING: Environmental variable CUDA_VISIBLE_DEVICES \n' + ' is not defined, assuming GPU is not available. \n' + ' Will use CPU instead.') print(strTmp) # ------------------------------------------------------------------------- # *** Prepare status indicator # We create a status indicator for the time consuming pRF model finding # algorithm. Number of steps of the status indicator: varStsStpSze = 20 # Vector with pRF values at which to give status feedback: vecStatPrf = np.linspace(0, (varNumMdls * varNumChnk), num=(varStsStpSze + 1), endpoint=True) vecStatPrf = np.ceil(vecStatPrf) vecStatPrf = vecStatPrf.astype(int) # Vector with corresponding percentage values at which to give status # feedback: vecStatPrc = np.linspace(0, 100, num=(varStsStpSze + 1), endpoint=True) vecStatPrc = np.ceil(vecStatPrc) vecStatPrc = vecStatPrc.astype(int) # Counter for status indicator: varCntSts01 = 0 varCntSts02 = 0 # ------------------------------------------------------------------------- # *** Loop through chunks print('------Run graph') for idxChnk in range(varNumChnk): # print(('---------Chunk: ' + str(idxChnk))) # Define session: # objSess = tf.Session() with tf.Graph().as_default(), tf.Session() as objSess: # ------------------------------------------------------------------ # *** Prepare queue # print('------Define computational graph, queue & session') # Queue capacity: varCapQ = 10 # Dimensions of placeholder have to be determined outside of the # tensor object, otherwise the object on which the size is # calculated is loaded into GPU memory. varDim01 = lstPrfTc[0].shape[0] varDim02 = lstPrfTc[0].shape[1] # The queue: objQ = tf.FIFOQueue(capacity=varCapQ, dtypes=[tf.float32], shapes=[(varDim01, varDim02)]) # Method for getting queue size: objSzeQ = objQ.size() # Placeholder that is used to put design matrix on computational # graph: objPlcHld01 = tf.placeholder(tf.float32, shape=[varDim01, varDim02]) # The enqueue operation that puts data on the graph. objEnQ = objQ.enqueue([objPlcHld01]) # Number of threads that will be created: varNumThrd = 1 # The queue runner (places the enqueue operation on the queue?). objRunQ = tf.train.QueueRunner(objQ, [objEnQ] * varNumThrd) tf.train.add_queue_runner(objRunQ) # The tensor object that is retrieved from the queue. Functions # like placeholders for the data in the queue when defining the # graph. objDsng = objQ.dequeue() # Coordinator needs to be initialised: objCoord = tf.train.Coordinator() # ------------------------------------------------------------------ # *** Fill queue # Buffer size (number of samples to put on queue before starting # execution of graph): varBuff = 10 # Define & run extra thread with graph that places data on queue: objThrd = threading.Thread(target=funcPlcIn) objThrd.setDaemon(True) objThrd.start() # Stay in this while loop until the specified number of samples # (varBuffer) have been placed on the queue). varTmpSzeQ = 0 while varTmpSzeQ < varBuff: varTmpSzeQ = objSess.run(objSzeQ) # ------------------------------------------------------------------ # *** Prepare & run the graph # Chunk of functional data: aryTmp01 = np.copy(lstFunc[idxChnk]) # Place functional data on GPU or CPU, depending on GPU # availability: with tf.device(strPu): objFunc = tf.Variable(aryTmp01) # The computational graph. Operation that solves matrix (in the # least squares sense), and calculates residuals along time # dimension: objMatSlve = tf.reduce_sum(tf.squared_difference( objFunc, tf.matmul( objDsng, tf.matmul( tf.matmul(tf.matrix_inverse( tf.matmul(objDsng, objDsng, transpose_a=True, transpose_b=False)), objDsng, transpose_a=False, transpose_b=True), objFunc)), ), axis=0) # Use GPU, if available: with tf.device(strPu): # Variables need to be (re-)initialised: objSess.run(tf.global_variables_initializer()) # Mark graph as read-only (would throw an error in case of memory # leak): objSess.graph.finalize() # Index of first voxel in current chunk (needed to assign results): varChnkStr = int(vecIdxChnks[idxChnk]) # Index of last voxel in current chunk (needed to assign results): varChnkEnd = int(vecIdxChnks[(idxChnk + 1)]) # Array for results of current chunk: aryTmpRes = np.zeros((varNumMdls, lstFunc[idxChnk].shape[1]), dtype=np.float32) # Loop through models: for idxMdl in range(varNumMdls): # Run main computational graph and put results in list: # varTme01 = time.time() aryTmpRes[idxMdl, :] = objSess.run(objMatSlve) # print(('---------Time for graph call: ' # + str(time.time() - varTme01))) # Status indicator: if varCntSts02 == vecStatPrf[varCntSts01]: # Number of elements on queue: varTmpSzeQ = objSess.run(objSzeQ) # Prepare status message: strStsMsg = ('---------Progress: ' + str(vecStatPrc[varCntSts01]) + ' % --- Number of elements on queue: ' + str(varTmpSzeQ)) print(strStsMsg) # Only increment counter if the last value has not been # reached yet: if varCntSts01 < varStsStpSze: varCntSts01 = varCntSts01 + int(1) # Increment status indicator counter: varCntSts02 = varCntSts02 + 1 # Stop threads. objCoord.request_stop() # objSess.close() # Get indices of models with minimum residuals (minimum along # model-space) for current chunk: vecResSsMinIdx[varChnkStr:varChnkEnd] = np.argmin( aryTmpRes, axis=0).astype(np.int32) # Get minimum residuals of those models: vecResSsMin[varChnkStr:varChnkEnd] = np.min(aryTmpRes, axis=0) # Avoid memory overflow between chunks: del (aryTmpRes) # ------------------------------------------------------------------------- # *** Post-process results print('------Post-processing results') # Array for model parameters. At the moment, we have the indices of the # best fitting models, so we need an array that tells us what model # parameters these indices refer to. aryMdl = np.zeros((varNumMdlsTtl, 3), dtype=np.float32) # Model parameter can be represented as float32 as well: vecMdlXpos = vecMdlXpos.astype(np.float32) vecMdlYpos = vecMdlYpos.astype(np.float32) vecMdlSd = vecMdlSd.astype(np.float32) # The first column is to contain model x positions: aryMdl[:, 0] = np.repeat(vecMdlXpos, int(varNumY * varNumPrfSizes)) # The second column is to contain model y positions: aryMdl[:, 1] = np.repeat(np.tile(vecMdlYpos, varNumX), varNumPrfSizes) # The third column is to contain model pRF sizes: aryMdl[:, 2] = np.tile(vecMdlSd, int(varNumX * varNumY)) # The above code has the same result as the below (for better readability): # aryMdl = np.zeros((varNumMdls, 3), dtype=np.float32) # varCount = 0 # # Loop through pRF models: # for idxX in range(0, varNumX): # for idxY in range(0, varNumY): # for idxSd in range(0, varNumPrfSizes): # aryMdl[varCount, 0] = vecMdlXpos[idxX] # aryMdl[varCount, 1] = vecMdlYpos[idxY] # aryMdl[varCount, 2] = vecMdlSd[idxSd] # varCount += 1 # Earlier, we had removed models with a variance of zero. Thus, those # models were ignored and are not present in the results. We remove them # from the model-parameter-array: aryMdl = aryMdl[vecLgcVar] # Retrieve model parameters of 'winning' model for all voxels: vecBstXpos = aryMdl[:, 0][vecResSsMinIdx] vecBstYpos = aryMdl[:, 1][vecResSsMinIdx] vecBstSd = aryMdl[:, 2][vecResSsMinIdx] # Coefficient of determination (1 - ratio of (residual sum of squares by # total sum of squares)): vecBstR2 = np.subtract(1.0, np.divide(vecResSsMin, vecSsTot)) # Dummy array for PEs - TODO: PE retrieval for GPU. # aryBstPe[varNumVoxChnk, varNumCon]. aryBstPe = np.zeros((varNumVox, 1), dtype=np.float32) # Output list: lstOut = [idxPrc, vecBstXpos, vecBstYpos, vecBstSd, vecBstR2, aryBstPe] queOut.put(lstOut)
1)) ### WITHOUT ASYNCHRONOUS DATA LOADING ### else: print "use queue loading" ### WITH ASYNCHRONOUS DATA LOADING ### train_input_single = tf.placeholder(tf.float32, shape=(IMG_SIZE[0], IMG_SIZE[1], 1 + noc)) train_gt_single = tf.placeholder(tf.float32, shape=(IMG_SIZE[0], IMG_SIZE[1], 1)) q = tf.FIFOQueue(10000, [tf.float32, tf.float32], [[IMG_SIZE[0], IMG_SIZE[1], 1 + noc], [IMG_SIZE[0], IMG_SIZE[1], 1]]) enqueue_op = q.enqueue([train_input_single, train_gt_single]) train_input, train_gt = q.dequeue_many(BATCH_SIZE) ### WITH ASYNCHRONOUS DATA LOADING ### shared_model = tf.make_template('shared_model', model) #train_output, weights = model(train_input) train_output, weights = shared_model(train_input) loss = tf.reduce_sum((train_output - train_gt)**2) for w in weights: loss += tf.nn.l2_loss(w) * 1e-4 global_step = tf.Variable(0, trainable=False)
def __init__(self, network_name, checkpoint_path, batch_size, num_classes, image_size=None, preproc_func_name=None, preproc_threads=2): ''' TensorFlow feature extractor using tf.slim and models/slim. Core functionalities are loading network architecture, pretrained weights, setting up an image pre-processing function, queues for fast input reading. The main workflow after initialization is first loading a list of image files using the `enqueue_image_files` function and then pushing them through the network with `feed_forward_batch`. For pre-trained networks and some more explanation, checkout: https://github.com/tensorflow/models/tree/master/slim :param network_name: str, network name (e.g. resnet_v1_101) :param checkpoint_path: str, full path to checkpoint file to load :param batch_size: int, batch size :param num_classes: int, number of output classes :param image_size: int, width and height to overrule default_image_size (default=None) :param preproc_func_name: func, optional to overwrite default processing (default=None) :param preproc_threads: int, number of input threads (default=1) ''' self._network_name = network_name self._checkpoint_path = checkpoint_path self._batch_size = batch_size self._num_classes = num_classes self._image_size = image_size self._preproc_func_name = preproc_func_name self._num_preproc_threads = preproc_threads self._global_step = tf.train.get_or_create_global_step() # Retrieve the function that returns logits and endpoints self._network_fn = nets_factory.get_network_fn(self._network_name, num_classes=num_classes, is_training=False) # Retrieve the model scope from network factory self._model_scope = nets_factory.arg_scopes_map[self._network_name] # Fetch the default image size self._image_size = self._network_fn.default_image_size # Setup the input pipeline with a queue of filenames self._filename_queue = tf.FIFOQueue(100000, [tf.string], shapes=[[]], name="filename_queue") self._pl_image_files = tf.placeholder(tf.string, shape=[None], name="image_file_list") self._enqueue_op = self._filename_queue.enqueue_many( [self._pl_image_files]) self._num_in_queue = self._filename_queue.size() # Image reader and preprocessing self._batch_from_queue, self._batch_filenames = \ self._preproc_image_batch(self._batch_size, num_threads=preproc_threads) # Either use the placeholder as inputs or feed from queue self._image_batch = tf.placeholder_with_default( self._batch_from_queue, shape=[None, self._image_size, self._image_size, 3]) # Retrieve the logits and network endpoints (for extracting activations) # Note: endpoints is a dictionary with endpoints[name] = tf.Tensor self._logits, self._endpoints = self._network_fn(self._image_batch) # Find the checkpoint file checkpoint_path = self._checkpoint_path if tf.gfile.IsDirectory(self._checkpoint_path): checkpoint_path = tf.train.latest_checkpoint(self._checkpoint_path) # Load pre-trained weights into the model variables_to_restore = slim.get_variables_to_restore() restore_fn = slim.assign_from_checkpoint_fn(self._checkpoint_path, variables_to_restore) # Start the session and load the pre-trained weights self._sess = tf.Session() restore_fn(self._sess) # Local variables initializer, needed for queues etc. self._sess.run(tf.local_variables_initializer()) # Managing the queues and threads self._coord = tf.train.Coordinator() self._threads = tf.train.start_queue_runners(coord=self._coord, sess=self._sess)
def batch_inputs(dataset, batch_size, train, num_preprocess_threads=None, num_readers=1): """Contruct batches of training or evaluation examples from the image dataset. Args: dataset: instance of Dataset class specifying the dataset. See dataset.py for details. batch_size: integer train: boolean num_preprocess_threads: integer, total number of preprocessing threads num_readers: integer, number of parallel readers Returns: images: 4-D float Tensor of a batch of images labels: 1-D integer Tensor of [batch_size]. Raises: ValueError: if data is not found """ with tf.name_scope('batch_processing'): data_files = dataset.data_files() if data_files is None: raise ValueError('No data files found for this dataset') # Create filename_queue if train: filename_queue = tf.train.string_input_producer(data_files, shuffle=True, capacity=16) else: filename_queue = tf.train.string_input_producer(data_files, shuffle=False, capacity=1) if num_preprocess_threads is None: num_preprocess_threads = FLAGS.num_preprocess_threads if num_preprocess_threads % 4: raise ValueError( 'Please make num_preprocess_threads a multiple ' 'of 4 (%d % 4 != 0).', num_preprocess_threads) if num_readers is None: num_readers = FLAGS.num_readers if num_readers < 1: raise ValueError('Please make num_readers at least 1') # Approximate number of examples per shard. examples_per_shard = 1024 # Size the random shuffle queue to balance between good global # mixing (more examples) and memory use (fewer examples). # 1 image uses 299*299*3*4 bytes = 1MB # The default input_queue_memory_factor is 16 implying a shuffling queue # size: examples_per_shard * 16 * 1MB = 17.6GB min_queue_examples = examples_per_shard * FLAGS.input_queue_memory_factor if train: examples_queue = tf.RandomShuffleQueue( capacity=min_queue_examples + 3 * batch_size, min_after_dequeue=min_queue_examples, dtypes=[tf.string]) else: examples_queue = tf.FIFOQueue(capacity=examples_per_shard + 3 * batch_size, dtypes=[tf.string]) # Create multiple readers to populate the queue of examples. if num_readers > 1: enqueue_ops = [] for _ in range(num_readers): reader = dataset.reader() _, value = reader.read(filename_queue) enqueue_ops.append(examples_queue.enqueue([value])) tf.train.queue_runner.add_queue_runner( tf.train.queue_runner.QueueRunner(examples_queue, enqueue_ops)) example_serialized = examples_queue.dequeue() else: reader = dataset.reader() _, example_serialized = reader.read(filename_queue) images_and_labels = [] for thread_id in range(num_preprocess_threads): # Parse a serialized Example proto to extract the image and metadata. image_buffer, label_index, bbox, _ = parse_example_proto( example_serialized) image = image_preprocessing(image_buffer, bbox, train, thread_id) images_and_labels.append([image, label_index]) images, label_index_batch = tf.train.batch_join( images_and_labels, batch_size=batch_size, capacity=2 * num_preprocess_threads * batch_size) # Reshape images into these desired dimensions. height = FLAGS.image_size width = FLAGS.image_size depth = 3 images = tf.cast(images, tf.float32) images = tf.reshape(images, shape=[batch_size, height, width, depth]) # Display the training images in the visualizer. tf.summary.image('images', images) return images, tf.reshape(label_index_batch, [batch_size])
def preprocessing(self): with tf.name_scope("PreProcessing"): with tf.name_scope("ReadRawData"): coord = tf.train.Coordinator() #enq_threads=tf.train.start_queue_runners(self.session,coord) #print(enq_threads) try: #while True: #self.session.run(tf.local_variables_initializer()) #file=self.session.run(self.fetch_q.dequeue(name="DequeFiles")) mnist = tf.contrib.learn.datasets.load_dataset("mnist") self.train_images = mnist.train.images # Returns np.array self.train_images = tf.cast(self.train_images, dtype=tf.float32) self.train_images = tf.Session().run(self.train_images) self.train_labels = np.asarray(mnist.train.labels, dtype=np.float32) self.test_images = mnist.test.images self.test_labels = np.asarray(mnist.train.labels, dtype=np.int32) #print(self.train_labels) if (coord.should_stop()): pass except Exception as e: coord.request_stop(e) print(e.message, " ", "80", "\n") else: coord.request_stop() #coord.join(enq_threads) pass with tf.name_scope("ProcessRawData"): try: '''os.system("rscript analysis.r")''' length = len(self.train_images) print(length) except Exception as e: print(e) else: self.img_q = tf.FIFOQueue(length, tf.float32, name="IMGQueue") self.lab_q = tf.FIFOQueue(length, tf.float32, name="LABQueue") self.img = tf.data.Dataset().from_tensor_slices( self.train_images[2800:]) #self.img=self.img.prefetch(100) print(self.img) self.lab = tf.data.Dataset().from_tensor_slices( self.train_labels[2800:]) #self.img=self.lab.prefetch(100) print(self.lab) img_iterator = self.img.make_initializable_iterator() img = img_iterator.get_next() lab_iterator = self.lab.make_initializable_iterator() lab = lab_iterator.get_next() #self.session.run(iterator.initializer) #print(self.session.run(element),"\n") enq_img = self.img_q.enqueue(img, name="EnqueueIMGData") enq_lab = self.lab_q.enqueue(lab, name="EnqueueLABData") self.img_qr = tf.train.QueueRunner(self.img_q, [enq_img] * 128) self.lab_qr = tf.train.QueueRunner(self.lab_q, [enq_lab] * 128) pass with tf.name_scope("SaveTFRecords"): try: coord = tf.train.Coordinator() self.session.run(img_iterator.initializer) self.session.run(lab_iterator.initializer) img_enq_threads = self.img_qr.create_threads(self.session, coord, start=True) lab_enq_threads = self.lab_qr.create_threads(self.session, coord, start=True) #print(img_enq_threads) #print(lab_enq_threads) #vector_data=[] meta = { "type": self.file_schema["dataset"], "name": "TrainDataset2", "cmd": "", "vector": "" } meta["cmd"] = "open" self.WriteTFRecords(meta) while True: self.session.run(tf.local_variables_initializer()) if (not coord.should_stop()): img, lab = self.session.run([ self.img_q.dequeue(name="DequeueIMGData"), self.lab_q.dequeue(name="DequeueLABData") ]) #print(len(img)," ",lab,"\n") lab = tf.one_hot(lab, 10) #print(self.session.run(lab)) lab = self.session.run(lab) vector_data = [ tf.train.Feature(float_list=tf.train.FloatList( value=img)), tf.train.Feature(float_list=tf.train.FloatList( value=lab)) ] meta["cmd"] = "wrt" meta["vector"] = vector_data self.WriteTFRecords(meta) else: break pass except Exception as e: coord.request_stop(e) coord.join(img_enq_threads) #coord.join(lab_enq_threads) meta["vector"] = [] print(e.message, "\n") pass finally: meta["cmd"] = "close" self.WriteTFRecords(meta) pass print("preprocessing done")
def set_dataqueues(self, cluster): '''sets the data queues''' #check if running in distributed model self.data_queue = dict() for linkedset in self.linkedsets: data_queue_name = 'data_queue_%s_%s' % (self.task_name, linkedset) if 'local' in cluster.as_dict(): data_queue_elements, _ = input_pipeline.get_filenames( self.input_dataconfs[linkedset] + self.target_dataconfs[linkedset]) number_of_elements = len(data_queue_elements) if 'trainset_frac' in self.taskconf: number_of_elements = int( float(number_of_elements) * float(self.taskconf['trainset_frac'])) print '%d utterances will be used for training' % ( number_of_elements) data_queue_elements = data_queue_elements[:number_of_elements] #create the data queue and queue runners self.data_queue[linkedset] = tf.train.string_input_producer( string_tensor=data_queue_elements, shuffle=False, seed=None, capacity=self.batch_size * 2, shared_name=data_queue_name) #compute the number of steps if int(self.trainerconf['numbatches_to_aggregate']) == 0: num_steps = (int(self.trainerconf['num_epochs']) * len(data_queue_elements) / self.batch_size) else: num_steps = ( int(self.trainerconf['num_epochs']) * len(data_queue_elements) / (self.batch_size * int(self.trainerconf['numbatches_to_aggregate']))) done_ops = [tf.no_op()] else: #get the data queue self.data_queue[linkedset] = tf.FIFOQueue( capacity=self.batch_size * (num_replicas + 1), shared_name=data_queue_name, name=data_queue_name, dtypes=[tf.string], shapes=[[]]) #get the number of steps from the parameter server num_steps_queue = tf.FIFOQueue(capacity=num_replicas, dtypes=[tf.int32], shared_name='num_steps_queue', name='num_steps_queue', shapes=[[]]) #set the number of steps num_steps = num_steps_queue.dequeue() #get the done queues for i in range(num_servers): with tf.device('job:ps/task:%d' % i): done_queue = tf.FIFOQueue(capacity=num_replicas, dtypes=[tf.bool], shapes=[[]], shared_name='done_queue%d' % i, name='done_queue%d' % i) done_ops.append(done_queue.enqueue(True)) return num_steps, done_ops
def train(epochs, batch_size, learning_rate, dropout, momentum, lmbda, resume, imagenet_path, display_step, test_step, ckpt_path, summary_path): """ Procedure to train the model on ImageNet ILSVRC 2012 training set Args: resume: boolean variable, true if want to resume the training, false to train from scratch imagenet_path: path to ILSRVC12 ImageNet folder containing train images, validation images, annotations and metadata file display_step: number representing how often printing the current training accuracy test_step: number representing how often make a test and print the validation accuracy ckpt_path: path where to save model's tensorflow checkpoint (or from where resume) summary_path: path where to save logs for TensorBoard """ train_img_path = os.path.join(imagenet_path, 'ILSVRC2012_img_train') ts_size = tu.imagenet_size(train_img_path) num_batches = int(float(ts_size) / batch_size) wnid_labels, _ = tu.load_imagenet_meta( os.path.join(imagenet_path, 'data/meta.mat')) x = tf.placeholder(tf.float32, [None, 224, 224, 3]) y = tf.placeholder(tf.float32, [None, 1000]) lr = tf.placeholder(tf.float32) keep_prob = tf.placeholder(tf.float32) # queue of examples being filled on the cpu with tf.device('/cpu:0'): q = tf.FIFOQueue(batch_size * 3, [tf.float32, tf.float32], shapes=[[224, 224, 3], [1000]]) enqueue_op = q.enqueue_many([x, y]) x_b, y_b = q.dequeue_many(batch_size) pred, _ = alexnet.classifier(x_b, keep_prob) # cross-entropy and weight decay with tf.name_scope('cross_entropy'): cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y_b, name='cross-entropy')) with tf.name_scope('l2_loss'): l2_loss = tf.reduce_sum( lmbda * tf.stack([tf.nn.l2_loss(v) for v in tf.get_collection('weights')])) tf.summary.scalar('l2_loss', l2_loss) with tf.name_scope('loss'): loss = cross_entropy + l2_loss tf.summary.scalar('loss', loss) # accuracy with tf.name_scope('accuracy'): correct = tf.equal(tf.argmax(pred, 1), tf.argmax(y_b, 1)) accuracy = tf.reduce_mean(tf.cast(correct, tf.float32)) tf.summary.scalar('accuracy', accuracy) global_step = tf.Variable(0, trainable=False) epoch = tf.div(global_step, num_batches) # momentum optimizer with tf.name_scope('optimizer'): optimizer = tf.train.MomentumOptimizer(learning_rate=lr, momentum=momentum).minimize( loss, global_step=global_step) # merge summaries to write them to file merged = tf.summary.merge_all() # checkpoint saver saver = tf.train.Saver() coord = tf.train.Coordinator() # init = tf.initialize_all_variables() init = tf.global_variables_initializer() with tf.Session(config=tf.ConfigProto()) as sess: if resume: saver.restore(sess, os.path.join(ckpt_path, 'alexnet-cnn.ckpt')) else: sess.run(init) # enqueuing batches procedure def enqueue_batches(): while not coord.should_stop(): im, l = tu.read_batch(batch_size, train_img_path, wnid_labels) sess.run(enqueue_op, feed_dict={x: im, y: l}) # creating and starting parallel threads to fill the queue num_threads = 3 for i in range(num_threads): t = threading.Thread(target=enqueue_batches) t.setDaemon(True) t.start() # operation to write logs for tensorboard visualization train_writer = tf.summary.FileWriter( os.path.join(summary_path, 'train'), sess.graph) start_time = time.time() for e in range(sess.run(epoch), epochs): for i in range(num_batches): _, step = sess.run([optimizer, global_step], feed_dict={ lr: learning_rate, keep_prob: dropout }) # train_writer.add_summary(summary, step) # decaying learning rate if step == 170000 or step == 350000: learning_rate /= 10 # display current training informations if step % display_step == 0: c, a = sess.run([loss, accuracy], feed_dict={ lr: learning_rate, keep_prob: 1.0 }) print( 'Epoch: {:03d} Step/Batch: {:09d} --- Loss: {:.7f} Training accuracy: {:.4f}' .format(e, step, c, a)) # make test and evaluate validation accuracy if step % test_step == 0: val_im, val_cls = tu.read_validation_batch( batch_size, os.path.join(imagenet_path, 'ILSVRC2012_img_val'), os.path.join( imagenet_path, 'data/ILSVRC2012_validation_ground_truth.txt')) v_a = sess.run(accuracy, feed_dict={ x_b: val_im, y_b: val_cls, lr: learning_rate, keep_prob: 1.0 }) # intermediate time int_time = time.time() print('Elapsed time: {}'.format( tu.format_time(int_time - start_time))) print('Validation accuracy: {:.04f}'.format(v_a)) # save weights to file save_path = saver.save( sess, os.path.join(ckpt_path, 'alexnet-cnn.ckpt')) print('Variables saved in file: %s' % save_path) end_time = time.time() print('Elapsed time: {}').format(tu.format_time(end_time - start_time)) save_path = saver.save(sess, os.path.join(ckpt_path, 'alexnet-cnn.ckpt')) print('Variables saved in file: %s' % save_path) coord.request_stop()
def train(action_set, level_names): """Train.""" if is_single_machine(): local_job_device = '' shared_job_device = '' is_actor_fn = lambda i: True is_learner = True global_variable_device = '/gpu' server = tf.train.Server.create_local_server() filters = [] # print("Type of atari data structure: ", type(level_names)) # print("Should be atari games: ", level_names[0]) else: local_job_device = '/job:%s/task:%d' % (FLAGS.job_name, FLAGS.task) shared_job_device = '/job:learner/task:0' is_actor_fn = lambda i: FLAGS.job_name == 'actor' and i == FLAGS.task is_learner = FLAGS.job_name == 'learner' # Placing the variable on CPU, makes it cheaper to send it to all the # actors. Continual copying the variables from the GPU is slow. global_variable_device = shared_job_device + '/cpu' cluster = tf.train.ClusterSpec({ 'actor': ['localhost:%d' % (8001 + i) for i in range(FLAGS.num_actors)], 'learner': ['localhost:8000'] }) server = tf.train.Server(cluster, job_name=FLAGS.job_name, task_index=FLAGS.task) filters = [shared_job_device, local_job_device] # Only used to find the actor output structure. with tf.Graph().as_default(): env = create_atari_environment(level_names[0], seed=1) agent = Agent(len(action_set)) structure = build_actor(agent, env, level_names[0], action_set) flattened_structure = nest.flatten(structure) dtypes = [t.dtype for t in flattened_structure] shapes = [t.shape.as_list() for t in flattened_structure] with tf.Graph().as_default(), \ tf.device(local_job_device + '/cpu'), \ pin_global_variables(global_variable_device): tf.set_random_seed(1) # Makes initialization deterministic. # Create Queue and Agent on the learner. with tf.device(shared_job_device): queue = tf.FIFOQueue(1, dtypes, shapes, shared_name='buffer') agent = Agent(len(action_set)) if is_single_machine() and 'dynamic_batching' in sys.modules: # For single machine training, we use dynamic batching for improved GPU # utilization. The semantics of single machine training are slightly # different from the distributed setting because within a single unroll # of an environment, the actions may be computed using different weights # if an update happens within the unroll. old_build = agent._build @dynamic_batching.batch_fn def build(*args): # print("experiment.py: args: ", args) with tf.device('/gpu'): return old_build(*args) tf.logging.info('Using dynamic batching.') agent._build = build # Build actors and ops to enqueue their output. enqueue_ops = [] for i in range(FLAGS.num_actors): if is_actor_fn(i): level_name = level_names[i % len(level_names)] tf.logging.info('Creating actor %d with level %s', i, level_name) env = create_atari_environment(level_name, seed=i + 1) # TODO: Modify to atari environment actor_output = build_actor(agent, env, level_name, action_set) # print("Actor output is: ", actor_output) with tf.device(shared_job_device): enqueue_ops.append( queue.enqueue(nest.flatten(actor_output))) # If running in a single machine setup, run actors with QueueRunners # (separate threads). if is_learner and enqueue_ops: tf.train.add_queue_runner(tf.train.QueueRunner(queue, enqueue_ops)) # Build learner. if is_learner: # Create global step, which is the number of environment frames processed. tf.get_variable('num_environment_frames', initializer=tf.zeros_initializer(), shape=[], dtype=tf.int64, trainable=False, collections=[ tf.GraphKeys.GLOBAL_STEP, tf.GraphKeys.GLOBAL_VARIABLES ]) # Create batch (time major) and recreate structure. dequeued = queue.dequeue_many(FLAGS.batch_size) dequeued = nest.pack_sequence_as(structure, dequeued) def make_time_major(s): return nest.map_structure( lambda t: tf.transpose(t, [1, 0] + list( range(t.shape.ndims))[2:]), s) dequeued = dequeued._replace( env_outputs=make_time_major(dequeued.env_outputs), agent_outputs=make_time_major(dequeued.agent_outputs)) with tf.device('/gpu'): # Using StagingArea allows us to prepare the next batch and send it to # the GPU while we're performing a training step. This adds up to 1 step # policy lag. flattened_output = nest.flatten(dequeued) area = tf.contrib.staging.StagingArea( [t.dtype for t in flattened_output], [t.shape for t in flattened_output]) stage_op = area.put(flattened_output) data_from_actors = nest.pack_sequence_as(structure, area.get()) # Unroll agent on sequence, create losses and update ops. output = build_learner(agent, data_from_actors.agent_state, data_from_actors.env_outputs, data_from_actors.agent_outputs) # Create MonitoredSession (to run the graph, checkpoint and log). tf.logging.info('Creating MonitoredSession, is_chief %s', is_learner) config = tf.ConfigProto(allow_soft_placement=True, device_filters=filters) with tf.train.MonitoredTrainingSession( server.target, is_chief=is_learner, checkpoint_dir=FLAGS.logdir, save_checkpoint_secs=600, save_summaries_secs=30, log_step_count_steps=50000, config=config, hooks=[py_process.PyProcessHook()]) as session: if is_learner: # Logging. # TODO: Modify this to be able to handle atari # env_returns = {env_id: [] for env_id in env_ids} level_returns = {level_name: [] for level_name in level_names} # COMMENT OUT SUMMARY WRITER IF NEEDED # summary_writer = tf.summary.FileWriterCache.get(FLAGS.logdir) # Prepare data for first run. session.run_step_fn( lambda step_context: step_context.session.run(stage_op)) # Execute learning and track performance. num_env_frames_v = 0 total_episode_frames = 0 average_frames = 24000 # TODO: Modify to Atari total_episode_return = 0.0 while num_env_frames_v < FLAGS.total_environment_frames: # print("(atari_experiment.py) num_env_frames: ", num_env_frames_v) level_names_v, done_v, infos_v, num_env_frames_v, _ = session.run( (data_from_actors.level_name, ) + output + (stage_op, )) level_names_v = np.repeat([level_names_v], done_v.shape[0], 0) total_episode_frames = num_env_frames_v for level_name, episode_return, episode_step in zip( level_names_v[done_v], infos_v.episode_return[done_v], infos_v.episode_step[done_v]): episode_frames = episode_step * FLAGS.num_action_repeats total_episode_return += episode_return tf.logging.info( 'Level: %s Episode return: %f after %d frames', level_name, episode_return, num_env_frames_v) summary = tf.summary.Summary() summary.value.add(tag=level_name + '/episode_return', simple_value=episode_return) summary.value.add(tag=level_name + '/episode_frames', simple_value=episode_frames) # summary_writer.add_summary(summary, num_env_frames_v) # TODO: refactor better level_returns[level_name].append(episode_return) # Calculate total reward after last X frames if total_episode_frames % average_frames == 0: with open("logging.txt", "a+") as f: f.write( "Total frames:%d total_return: %f last %d frames\n" % (num_env_frames_v, total_episode_return, average_frames)) # tf.logging.info('total return %f last %d frames', # total_episode_return, average_frames) total_episode_return = 0 total_episode_frames = 0 current_episode_return_list = min( map(len, level_returns.values())) if current_episode_return_list >= 1: no_cap = utilities_atari.compute_human_normalized_score( level_returns, per_level_cap=None) cap_100 = utilities_atari.compute_human_normalized_score( level_returns, per_level_cap=100) if total_episode_frames % average_frames == 0: with open("multi-actors-output.txt", "a+") as f: # f.write("num env frames: %d\n" % num_env_frames_v) f.write("total_return %f last %d frames\n" % (total_episode_return, average_frames)) f.write("no cap: %f after %d frames\n" % (no_cap, num_env_frames_v)) f.write("cap 100: %f after %d frames\n" % (cap_100, num_env_frames_v)) # print("(atari_experiment) No cap: ", no_cap) # print("(atari_experiment) cap 100: ", cap_100) summary = tf.summary.Summary() summary.value.add(tag='atari/training_no_cap', simple_value=no_cap) summary.value.add(tag='atari/training_cap_100', simple_value=cap_100) # summary_writer.add_summary(summary, num_env_frames_v) # Clear level scores. # TODO refactor level_returns = { level_name: [] for level_name in level_names } else: # Execute actors (they just need to enqueue their output). while True: session.run(enqueue_ops)
import tensorflow as tf # 创建一个先入先出队列,初始化队列插入0.1、0.2、0.3 三个数字 q = tf.FIFOQueue(3, "float") init = q.enqueue_many(([0.1, 0.2, 0.3], )) # 定义出队、+1、入队操作 x = q.dequeue() y = x + 1 q_inc = q.enqueue([y]) # 开启会话,执行2次q_inc操作,查看队列内容 with tf.Session() as sess: sess.run(init) quelen = sess.run(q.size()) for i in range(2): sess.run(q_inc) #执行2次操作,队列值变为0.3,1.1,1.2 quelen = sess.run(q.size()) for i in range(quelen): print(sess.run(q.dequeue())) #输出队列值
def _create_generator(self, input_batch): init_ops = [] push_ops = [] outputs = [] current_layer = input_batch q = tf.FIFOQueue(1, dtypes=tf.float32, shapes=(self.batch_size, self.quantization_channels)) init = q.enqueue_many( tf.zeros((1, self.batch_size, self.quantization_channels))) current_state = q.dequeue() push = q.enqueue([current_layer]) init_ops.append(init) push_ops.append(push) current_layer = self._generator_causal_layer( current_layer, current_state, self.quantization_channels, self.residual_channels) # Add all defined dilation layers. with tf.name_scope('dilated_stack'): for layer_index, dilation in enumerate(self.dilations): with tf.name_scope('layer{}'.format(layer_index)): q = tf.FIFOQueue(dilation, dtypes=tf.float32, shapes=(self.batch_size, self.residual_channels)) init = q.enqueue_many( tf.zeros((dilation, self.batch_size, self.residual_channels))) current_state = q.dequeue() push = q.enqueue([current_layer]) init_ops.append(init) push_ops.append(push) output, current_layer = self._generator_dilation_layer( current_layer, current_state, layer_index, dilation, self.residual_channels, self.dilation_channels, self.skip_channels) outputs.append(output) self.init_ops = init_ops self.push_ops = push_ops with tf.name_scope('postprocessing'): # Perform (+) -> ReLU -> 1x1 conv -> ReLU -> 1x1 conv to # postprocess the output. w1 = tf.Variable(tf.truncated_normal( [1, self.skip_channels, self.skip_channels], stddev=0.3), name="postprocess1") w2 = tf.Variable(tf.truncated_normal( [1, self.skip_channels, self.quantization_channels], stddev=0.3), name="postprocess2") # We skip connections from the outputs of each layer, adding them # all up here. total = sum(outputs) transformed1 = tf.nn.relu(total) conv1 = tf.matmul(transformed1, w1[0, :, :]) transformed2 = tf.nn.relu(conv1) conv2 = tf.matmul(transformed2, w2[0, :, :]) return conv2
import tensorflow as tf q = tf.FIFOQueue(2, "int32") init = q.enqueue_many(([0, 10], )) x = q.dequeue() y = x + 1 q_inc = q.enqueue([y]) options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() name = 'tf-queue' dump_dir = '/tmp/' graph_path = ''.join([name, '.pbtxt']) step_stats_path = ''.join([dump_dir, name, '-stepstats.pbtxt']) tensorboard_path = ''.join([dump_dir, name]) meta_graph_path = ''.join([dump_dir, name, '.meta']) with tf.Session() as sess: init.run() for i in range(10): v, _ = sess.run([x, q_inc], options=options, run_metadata=run_metadata) print(v) tf.train.write_graph(sess.graph, dump_dir, graph_path) open(step_stats_path, 'w').write(str(run_metadata.step_stats)) writer = tf.summary.FileWriter(tensorboard_path, sess.graph)
def __init__(self, splited_keys, images, datasets, config_input, augmenter, perception_interface): # sample inputs # splited_keys: _splited_keys_train[i_labels_per_division][i_steering_bins_perc][a list of keys] # images: [i_sensor][i_file_number] = (lastidx, lastidx + x.shape[0], x) # datasets: [i_target_name] = dim*batch matrix, where batch=#all_samples # config_input: configInputs # augmenter: config_input.augment # save the inputs self._splited_keys = splited_keys self._images = images self._targets = np.concatenate( tuple(datasets), axis=1) # Cat the datasets, The shape is totalnum*totaldim self._config = config_input self._augmenter = augmenter self._batch_size = config_input.batch_size # prepare all the placeholders: 3 sources: _queue_image_input, _queue_targets, _queue_inputs self._queue_image_input = tf.placeholder( tf.float32, shape=[ config_input.batch_size, config_input.feature_input_size[0], config_input.feature_input_size[1], config_input.feature_input_size[2] ]) self._queue_shapes = [self._queue_image_input.shape] # config.targets_names: ['wp1_angle', 'wp2_angle', 'Steer', 'Gas', 'Brake', 'Speed'] self._queue_targets = [] for i in range(len(self._config.targets_names)): self._queue_targets.append( tf.placeholder(tf.float32, shape=[ config_input.batch_size, self._config.targets_sizes[i] ])) self._queue_shapes.append(self._queue_targets[-1].shape) # self.inputs_names = ['Control', 'Speed'] self._queue_inputs = [] for i in range(len(self._config.inputs_names)): self._queue_inputs.append( tf.placeholder(tf.float32, shape=[ config_input.batch_size, self._config.inputs_sizes[i] ])) self._queue_shapes.append(self._queue_inputs[-1].shape) self._queue = tf.FIFOQueue( capacity=config_input.queue_capacity, dtypes=[tf.float32] + [tf.float32] * (len(self._config.targets_names) + len(self._config.inputs_names)), shapes=self._queue_shapes) self._enqueue_op = self._queue.enqueue([self._queue_image_input] + self._queue_targets + self._queue_inputs) self._dequeue_op = self._queue.dequeue() #self.parallel_workers = Parallel(n_jobs=8, backend="threading") self.input_queue = mQueue(5) self.output_queue = mQueue(5) self.perception_interface = perception_interface if "mapping" in self._config.inputs_names: version = "v1" if hasattr(self._config, "mapping_version"): version = self._config.mapping_version self.mapping_helper = mapping_helper.mapping_helper( output_height_pix=self._config.map_height, version=version ) # using the default values, 30 meters of width view, 50*75*1 output size
def __init__(self, args, server, cluster, env, queue_shapes, trajectory_queue_size, replay_queue_size): self.env = env self.args = args self.task = args.task self.queue_shapes = queue_shapes self.trajectory_queue_size = trajectory_queue_size self.replay_queue_size = replay_queue_size self.action_sizes = env.action_sizes self.input_shape = list(self.env.observation_shape) worker_device = f"/job:worker/task:{self.task}" \ f"/{'gpu' if self.task < args.num_gpu else 'cpu'}:0" ########################### # Master policy (task=0) ########################### master_device = tf.train. \ replica_device_setter(1, worker_device=worker_device) with tf.device(master_device): self.prepare_master_network() ################################## # Queue pipelines (ps/task=0~) ################################## with tf.device('/job:ps/task:0'): # TODO: we may need more than 1 queue #for i in range(cluster.num_tasks('ps')): self.trajectory_queue = tf.FIFOQueue( self.trajectory_queue_size, [tf.float32] * len(self.queue_shapes), shapes=[shape for _, shape in self.queue_shapes], names=[name for name, _ in self.queue_shapes], shared_name='queue') self.trajectory_queue_size_op = self.trajectory_queue.size() self.replay_queue = tf.FIFOQueue( self.replay_queue_size, tf.float32, shapes=dict(self.queue_shapes)['states'], shared_name='replay') self.replay_queue_size_op = self.replay_queue.size() ########################### # Discriminator (task=1) ########################### if self.args.task == 0: policy_batch_size = self.args.policy_batch_size # XXX: may need this if you are lack of GPU memory #policy_batch_size = int(self.args.policy_batch_size \ # / self.env.episode_length) worker_device = f"/job:worker/task:{self.task}/cpu:0" with tf.device(worker_device): with tf.variable_scope("global"): self.dequeues = self.trajectory_queue. \ dequeue_many(policy_batch_size) elif self.args.task == 1 and self.args.loss == 'gan': self.prepare_gan() ##################################################### # Local policy network (task >= 2 (gan) or 1 (l2)) ##################################################### elif self.args.task >= 1: worker_device = f"/job:worker/task:{self.task}/cpu:0" with tf.device(worker_device): self.prepare_local_network()
def train(): if not os.path.exists(args.checkpoint): os.makedirs(args.checkpoint) batch_shape = (args.batch_size, 256, 256, 3) with tf.Graph().as_default(): tf.logging.set_verbosity(tf.logging.INFO) ### Setup data loading queue queue_input_content = tf.placeholder(tf.float32, shape=batch_shape) queue_input_val = tf.placeholder(tf.float32, shape=batch_shape) queue = tf.FIFOQueue(capacity=100, dtypes=[tf.float32, tf.float32], shapes=[[256, 256, 3], [256, 256, 3]]) enqueue_op = queue.enqueue_many([queue_input_content, queue_input_val]) dequeue_op = queue.dequeue() content_batch_op, val_batch_op = tf.train.batch( dequeue_op, batch_size=args.batch_size, capacity=100) def enqueue(sess): content_images = batch_gen(args.content_path, batch_shape) val_path = args.val_path if args.val_path is not None else args.content_path val_images = batch_gen(val_path, batch_shape) while True: content_batch = next(content_images) val_batch = next(val_images) sess.run(enqueue_op, feed_dict={ queue_input_content: content_batch, queue_input_val: val_batch }) ### Build the model graph & train/summary ops, and get the EncoderDecoder model = WCTModel(mode='train', relu_targets=[args.relu_target], vgg_path=args.vgg_path, batch_size=args.batch_size, feature_weight=args.feature_weight, pixel_weight=args.pixel_weight, tv_weight=args.tv_weight, learning_rate=args.learning_rate, lr_decay=args.lr_decay).encoder_decoders[0] saver = tf.train.Saver(max_to_keep=args.max_to_keep) config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: enqueue_thread = threading.Thread(target=enqueue, args=[sess]) enqueue_thread.isDaemon() enqueue_thread.start() coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord, sess=sess) log_path = args.log_path if args.log_path is not None else os.path.join( args.checkpoint, 'log') summary_writer = tf.summary.FileWriter(log_path, sess.graph) sess.run(tf.global_variables_initializer()) def load_latest(): if os.path.exists(os.path.join(args.checkpoint, 'checkpoint')): print("Restoring checkpoint") saver.restore(sess, tf.train.latest_checkpoint(args.checkpoint)) load_latest() for iteration in range(args.max_iter): start = time.time() content_batch = sess.run(content_batch_op) fetches = { 'train': model.train_op, 'global_step': model.global_step, # 'summary': model.summary_op, 'lr': model.learning_rate, 'feature_loss': model.feature_loss, 'pixel_loss': model.pixel_loss, 'tv_loss': model.tv_loss } feed_dict = {model.content_input: content_batch} try: results = sess.run(fetches, feed_dict=feed_dict) except Exception as e: print(e) print( "Exception encountered, re-loading latest checkpoint") load_latest() continue ### Run a val batch and log the summaries if iteration % args.summary_iter == 0: val_batch = sess.run(val_batch_op) summary = sess.run( model.summary_op, feed_dict={model.content_input: val_batch}) summary_writer.add_summary(summary, results['global_step']) ### Save checkpoint if iteration % args.save_iter == 0: save_path = saver.save( sess, os.path.join(args.checkpoint, 'model.ckpt'), results['global_step']) print("Model saved in file: %s" % save_path) ### Log training stats print( "Step: {} LR: {:.7f} Feature: {:.5f} Pixel: {:.5f} TV: {:.5f} Time: {:.5f}" .format(results['global_step'], results['lr'], results['feature_loss'], results['pixel_loss'], results['tv_loss'], time.time() - start)) # Last save save_path = saver.save(sess, os.path.join(args.checkpoint, 'model.ckpt'), results['global_step']) print("Model saved in file: %s" % save_path)
# -*- coding: utf-8 -*- import tensorflow as tf # 定义输入数据,一共有9个 x_input_data = tf.random_normal([6], mean=-1, stddev=4) # 定义一个容量为2的队列 q = tf.FIFOQueue(capacity=3, dtypes=tf.float32) x_input_data = tf.Print(x_input_data, data=[x_input_data], message="Raw inputs data generated:", summarize=6) # 注入多个值进入队列 enqueue_op = q.enqueue_many(x_input_data) numbreOfThreads = 1 # 定义queue runner qr = tf.train.QueueRunner(q, [enqueue_op] * numbreOfThreads) # 将queue runner集合中 tf.train.add_queue_runner(qr) input = q.dequeue() input = tf.Print(input, data=[q.size(), input], message="Nb element left, input:") # 假设开始训练
import tensorflow as tf queue = tf.FIFOQueue(100, "float") enqueue_op = queue.enqueue([tf.random_normal([1])]) qr = tf.train.QueueRunner(queue, [enqueue_op] * 5) tf.train.add_queue_runner(qr) out_tensor = queue.dequeue() with tf.Session() as sess: coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) for _ in range(3): print sess.run(out_tensor)[0] coord.request_stop() coord.join(threads)
# -*- coding: utf-8 -*- import tensorflow as tf """ Tensorflow队列 """ q = tf.FIFOQueue(2, "int32") # 定义一个FIFO队列 init = q.enqueue_many(([0, 10], )) # 插入多个元素 x = q.dequeue() # 第一个元素出队 y = x + 1 # 加一 q_inc = q.enqueue([y]) # 插入一个元素 with tf.Session() as sess: init.run() # 初始化操作 for _ in range(5): v, _ = sess.run([x, q_inc]) # 执行计算节点 print(v)
def __init__(self, coordinator, metadata_filename, base_dir, hparams): super(Feeder, self).__init__() if hparams.gin_channels > 0: raise NotImplementedError('Global conditioning preprocessing has not been added yet, it will be out soon. Thanks for your patience!') self._coord = coordinator self._hparams = hparams self._train_offset = 0 self._test_offset = 0 #Base directory of the project (to map files from different locations) self._base_dir = base_dir #Load metadata self._data_dir = os.path.dirname(metadata_filename) with open(metadata_filename, 'r') as f: self._metadata = [line.strip().split('|') for line in f] #Train test split if hparams.wavenet_test_size is None: assert hparams.wavenet_test_batches is not None test_size = (hparams.wavenet_test_size if hparams.wavenet_test_size is not None else hparams.wavenet_test_batches * hparams.wavenet_batch_size) indices = np.arange(len(self._metadata)) train_indices, test_indices = train_test_split(indices, test_size=test_size, random_state=hparams.wavenet_data_random_state) #Make sure test size is a multiple of batch size else round up len_test_indices = _round_up(len(test_indices), hparams.wavenet_batch_size) extra_test = test_indices[len_test_indices:] test_indices = test_indices[:len_test_indices] train_indices = np.concatenate([train_indices, extra_test]) self._train_meta = list(np.array(self._metadata)[train_indices]) self._test_meta = list(np.array(self._metadata)[test_indices]) self.test_steps = len(self._test_meta) // hparams.wavenet_batch_size if hparams.wavenet_test_size is None: assert hparams.wavenet_test_batches == self.test_steps #Get conditioning status self.local_condition, self.global_condition = self._check_conditions() #with tf.device('/cpu:0'): # Create placeholders for inputs and targets. Don't specify batch size because we want # to be able to feed different batch sizes at eval time. if is_scalar_input(hparams.input_type): input_placeholder = tf.placeholder(tf.float32, shape=(None, 1, None), name='audio_inputs') target_placeholder = tf.placeholder(tf.float32, shape=(None, None, 1), name='audio_targets') target_type = tf.float32 else: input_placeholder = tf.placeholder(tf.float32, shape=(None, hparams.quantize_channels, None), name='audio_inputs') target_placeholder = tf.placeholder(tf.int32, shape=(None, None, 1), name='audio_targets') target_type = tf.int32 self._placeholders = [ input_placeholder, target_placeholder, tf.placeholder(tf.int32, shape=(None, ), name='input_lengths'), ] queue_types = [tf.float32, target_type, tf.int32] if self.local_condition: self._placeholders.append(tf.placeholder(tf.float32, shape=(None, hparams.num_mels, None), name='local_condition_features')) queue_types.append(tf.float32) if self.global_condition: self._placeholders.append(tf.placeholder(tf.int32, shape=(), name='global_condition_features')) queue_types.append(tf.int32) # Create queue for buffering data queue = tf.FIFOQueue(8, queue_types, name='intput_queue') self._enqueue_op = queue.enqueue(self._placeholders) variables = queue.dequeue() self.inputs = variables[0] self.inputs.set_shape(self._placeholders[0].shape) self.targets = variables[1] self.targets.set_shape(self._placeholders[1].shape) self.input_lengths = variables[2] self.input_lengths.set_shape(self._placeholders[2].shape) #If local conditioning disabled override c inputs with None if hparams.cin_channels < 0: self.local_condition_features = None else: self.local_condition_features = variables[3] self.local_condition_features.set_shape(self._placeholders[3].shape) #If global conditioning disabled override g inputs with None if hparams.gin_channels < 0: self.global_condition_features = None else: self.global_condition_features = variables[4] self.global_condition_features.set_shape(self._placeholders[4].shape) # Create queue for buffering eval data eval_queue = tf.FIFOQueue(1, queue_types, name='eval_queue') self._eval_enqueue_op = eval_queue.enqueue(self._placeholders) eval_variables = eval_queue.dequeue() self.eval_inputs = eval_variables[0] self.eval_inputs.set_shape(self._placeholders[0].shape) self.eval_targets = eval_variables[1] self.eval_targets.set_shape(self._placeholders[1].shape) self.eval_input_lengths = eval_variables[2] self.eval_input_lengths.set_shape(self._placeholders[2].shape) #If local conditioning disabled override c inputs with None if hparams.cin_channels < 0: self.eval_local_condition_features = None else: self.eval_local_condition_features = eval_variables[3] self.eval_local_condition_features.set_shape(self._placeholders[3].shape) #If global conditioning disabled override g inputs with None if hparams.gin_channels < 0: self.eval_global_condition_features = None else: self.eval_global_condition_features = eval_variables[4] self.eval_global_condition_features.set_shape(self._placeholders[4].shape)