def read_csv_examples(image_dir, label_dir, batch_size=100, num_epochs=None, task_index=None, num_workers=None): print_log(worker_num, "num_epochs: {0}".format(num_epochs)) # Setup queue of csv image filenames tf_record_pattern = os.path.join(image_dir, 'part-*') images = tf.gfile.Glob(tf_record_pattern) print_log(worker_num, "images: {0}".format(images)) image_queue = tf.train.string_input_producer(images, shuffle=False, capacity=1000, num_epochs=num_epochs, name="image_queue") # Setup queue of csv label filenames tf_record_pattern = os.path.join(label_dir, 'part-*') labels = tf.gfile.Glob(tf_record_pattern) print_log(worker_num, "labels: {0}".format(labels)) label_queue = tf.train.string_input_producer(labels, shuffle=False, capacity=1000, num_epochs=num_epochs, name="label_queue") # Setup reader for image queue img_reader = tf.TextLineReader(name="img_reader") _, img_csv = img_reader.read(image_queue) image_defaults = [ [1.0] for col in range(784) ] img = tf.pack(tf.decode_csv(img_csv, image_defaults)) # Normalize values to [0,1] norm = tf.constant(255, dtype=tf.float32, shape=(784,)) image = tf.div(img, norm) print_log(worker_num, "image: {0}".format(image)) # Setup reader for label queue label_reader = tf.TextLineReader(name="label_reader") _, label_csv = label_reader.read(label_queue) label_defaults = [ [1.0] for col in range(10) ] label = tf.pack(tf.decode_csv(label_csv, label_defaults)) print_log(worker_num, "label: {0}".format(label)) # Return a batch of examples return tf.train.batch([image,label], batch_size, num_threads=args.readers, name="batch_csv")
def read_fer2013(eval_data): """ Read and parse the examples from the FER2013 data file Args: eval_data: boolean indicating whether we are using training or evaluation data Returns: A single example contained in an object with fields: height: number of rows width: number of columns depth: number of colour channels key: filename and record number for the example label: an int32 Tensor with the label in the range 0..6 image: a [height, width, depth] int32 Tensor with the image data """ class FER2013Record(object): pass result = FER2013Record() # Dataset dimensions result.height = 48 result.width = 48 result.depth = 1 # Set up the reader filename = tf.train.string_input_producer(["FER2013 data/fer2013/fer2013.csv"]) # read from the data file # training data starts on line 2 (single header line) # test data starts after the training data skip_lines = 1 if eval_data: skip_lines = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN reader = tf.TextLineReader(skip_header_lines=skip_lines) # Read a line corresponding to an example result.key, value = reader.read(filename) # Decode the line according to its formatting def1 = [[0],["Empty"],["Empty"]] result.label, image, result.testOrTrain = tf.decode_csv(value, def1) # The middle column corresponds to the image data of 48x48 = 2304 # The data is space separated hence field_delim=' ' def2 = [[0]]*(result.height*result.width) image = tf.decode_csv(image, def2, field_delim=' ') image = tf.reshape(image, [result.height, result.width, -1]) result.image = tf.cast(image, tf.uint8) return result
def test_inputs(self, csv, batch_size): print("input csv file path: %s, batch size: %d" % (csv, batch_size)) filename_queue = tf.train.string_input_producer([csv], shuffle=False) reader = tf.TextLineReader() _, serialized_example = reader.read(filename_queue) filename, label = tf.decode_csv(serialized_example, [["path"], [0]]) label = tf.cast(label, tf.int32) jpg = tf.read_file(filename) image = tf.image.decode_jpeg(jpg, channels=3) image = tf.cast(image, tf.float32) print "original image shape:" print image.get_shape() # resize to distort dist = tf.image.resize_images(image, FLAGS.scale_h, FLAGS.scale_w) # random crop dist = tf.image.resize_image_with_crop_or_pad(dist, FLAGS.input_h, FLAGS.input_w) min_fraction_of_examples_in_queue = 0.4 min_queue_examples = int(FLAGS.num_examples_per_epoch_for_train * min_fraction_of_examples_in_queue) print ( 'filling queue with %d train images before starting to train. This will take a few minutes.' % min_queue_examples) return self._generate_image_and_label_batch(dist, label, min_queue_examples, batch_size, shuffle=False)
def read(filename_queue): class Record(object): pass result = Record() reader = tf.TextLineReader() result.key, line = reader.read(filename_queue) #sess = tf.Session() #print(line[0].eval(session=sess), line[1].eval(session=sess)) #sess.close() #print(line.get_shape()) record_defaults = [[0] for _ in xrange(2305)] columns = tf.decode_csv(line, record_defaults=record_defaults) #print("PRINT: " , len(columns)) x = tf.pack(columns[1:]) cls = columns[0] result.height = 48 result.width = 48 result.label = tf.cast(cls, tf.int32) depth_major = tf.reshape(x, [result.height, result.width, 1]) three_chann = tf.concat(2, [depth_major, depth_major, depth_major]) print(three_chann.get_shape()) result.image = three_chann return result
def _input_fn(): num_epochs = 100 if mode == tf.contrib.learn.ModeKeys.TRAIN else 1 # could be a path to one file or a file pattern. input_file_names = tf.train.match_filenames_once(filename) filename_queue = tf.train.string_input_producer( input_file_names, num_epochs=num_epochs, shuffle=True) reader = tf.TextLineReader() _, value = reader.read_up_to(filename_queue, num_records=BATCH_SIZE) value_column = tf.expand_dims(value, -1) print 'readcsv={}'.format(value_column) # all_data is a list of tensors all_data = tf.decode_csv(value_column, record_defaults=DEFAULTS) inputs = all_data[:len(all_data)-N_OUTPUTS] # first few values label = all_data[len(all_data)-N_OUTPUTS : ] # last few values # from list of tensors to tensor with one more dimension inputs = tf.concat(inputs, axis=1) label = tf.concat(label, axis=1) print 'inputs={}'.format(inputs) return {TIMESERIES_COL: inputs}, label # dict of features, label
def multi_reader_multi_example(): # create a FIFO queue filenames = ['a.csv', 'b.csv', 'c.csv'] filename_queue = tf.train.string_input_producer(filenames, shuffle=False) # create reader reader = tf.TextLineReader() key, value = reader.read(filename_queue) record_defaults = [['null'], ['null']] example_list = [tf.decode_csv(value, record_defaults=record_defaults) for _ in range(2)] example_batch, label_batch = tf.train.batch_join(example_list, batch_size=5) # run graph with tf.Session() as sess: coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) try: while not coord.should_stop(): print(example_batch.eval()) except tf.errors.OutOfRangeError: print('epoches completed!') finally: coord.request_stop() coord.join(threads)
def read_image_unlabeled(filename_queue, raw_img): class StatefarmRecord(object): pass result = StatefarmRecord() # Read a record, getting filenames from the filename_queue. result.key, _ = tf.decode_csv(filename_queue.dequeue(), [[""], [""]], " ") # Extract raw JPG data as a string # raw_contents = tf.read_file(result.key) # raw_contents = raw_img # Decode raw data as a PNG. Defaults to uint8 encoding. # result.uint8image = tf.image.decode_png(raw_contents) result.uint8image = raw_img.astype('uint8') # TENSORFLOW BUG: image shape not statically determined, so force # it to have correct CIFAR100 dimensions # result.uint8image.set_shape((32, 32, 3)) # Kind of hacky, but set a label so we can use the same structure # THIS SHOULD ALWAYS BE IGNORED DURING COMPUTATION, since we are # dealing with unlabaled data result.label = tf.cast(tf.string_to_number("0"), tf.int32) return result
def parse_csv(value): tf.logging.info('Parsing {}'.format(data_file)) columns = tf.decode_csv(value, record_defaults=_CSV_COLUMN_DEFAULTS) features = dict(zip(_CSV_COLUMNS, columns)) labels = features.pop('income_bracket') classes = tf.equal(labels, '>50K') # binary classification return features, classes
def decode_csv(line): parsed_line = tf.decode_csv(line, [[0.], [0.], [0.], [0.], [0]]) label = parsed_line[-1:] # Last element is the label del parsed_line[-1] # Delete last element features = parsed_line # Everything but last elements are the features d = dict(zip(feature_names, features)), label return d
def filequeue_to_batch_data(filename_queue, line_reader, batch_size = BATCH_SIZE): # The text file format should be Query Image, Trieve Image, Query label, # Trieve Label, Triplet loss Label( 0/1 ) key, next_line = line_reader.read(filename_queue) query_image_name, retrieve_image_name, label_1, label_2, label_3 = tf.decode_csv( next_line, [tf.constant([], dtype=tf.string),tf.constant([], dtype=tf.string), tf.constant([], dtype = tf.int32), tf.constant([], dtype = tf.int32), tf.constant([], dtype = tf.int32)], field_delim=" ") # batch_query_image, batch_label = tf.train.batch( # [query_image_name, label], batch_size=batch_size) reverse_channel = True # for pre-trained purpose query_tensor = image_io.read_image(query_image_name, reverse_channel, FEATURE_ROW, FEATURE_COL) retrieve_tensor = image_io.read_image(retrieve_image_name, reverse_channel, FEATURE_ROW, FEATURE_COL) if SHUFFLE_DATA: min_after_dequeue = 100 capacity = min_after_dequeue + 3 * batch_size batch_query_tensor, batch_retrieve_tensor, batch_label_1, batch_label_2, batch_label_3 = tf.train.shuffle_batch( [query_tensor, retrieve_tensor, label_1, label_2, label_3], batch_size = batch_size, capacity=capacity, min_after_dequeue=min_after_dequeue) else: batch_query_tensor,batch_retrieve_tensor, batch_label_1, batch_label_2, batch_label_3 = tf.train.batch( [query_tensor, retrieve_tensor, label_1, label_2, label_3], batch_size=batch_size) batch_tensor = tf.concat(0, [batch_query_tensor, batch_retrieve_tensor]) batch_label = tf.concat(0, [batch_label_1, batch_label_2]) return batch_tensor, batch_label, batch_label_3
def record_to_labeled_log_mel_examples(csv_record, clip_dir=None, hparams=None, label_class_index_table=None, num_classes=None): """Creates a batch of log mel spectrum examples from a training record. Args: csv_record: a line from the train.csv file downloaded from Kaggle. clip_dir: path to a directory containing clips referenced by csv_record. hparams: tf.contrib.training.HParams object containing model hyperparameters. label_class_index_table: a lookup table that represents the class map. num_classes: number of classes in the class map. Returns: features: Tensor containing a batch of log mel spectrum examples. labels: Tensor containing corresponding labels in 1-hot format. """ [clip, label, _] = tf.decode_csv(csv_record, record_defaults=[[''],[''],[0]]) features = clip_to_log_mel_examples(clip, clip_dir=clip_dir, hparams=hparams) class_index = label_class_index_table.lookup(label) label_onehot = tf.one_hot(class_index, num_classes) num_examples = tf.shape(features)[0] labels = tf.tile([label_onehot], [num_examples, 1]) return features, labels
def parse_csv(line): print("Parsing", data_file) # tf.decode_csv会把csv文件转换成很a list of Tensor,一列一个。record_defaults用于指明每一列的缺失值用什么填充 columns = tf.decode_csv(line, record_defaults=_CSV_COLUMN_DEFAULTS) features = dict(zip(_CSV_COLUMNS, columns)) labels = features.pop('income_bracket') return features, tf.equal(labels, '>50K') # tf.equal(x, y) 返回一个bool类型Tensor, 表示x == y, element-wise
def read_pascifar(pascifar_path, queue): """ Reads and parses files from the queue. Args: pascifar_path: a constant string tensor representing the path of the PASCIFAR dataset queue: A queue of strings in the format: file, label Returns: image_path: a tf.string tensor. The absolute path of the image in the dataset label: a int64 tensor with the label """ # Reader for text lines reader = tf.TextLineReader(skip_header_lines=1) # read a record from the queue _, row = reader.read(queue) # file,width,height,label record_defaults = [[""], [0]] image_path, label = tf.decode_csv(row, record_defaults, field_delim=",") image_path = pascifar_path + tf.constant("/") + image_path label = tf.cast(label, tf.int64) return image_path, label
def filequeue_to_batch_data(filename_queue, line_reader, batch_size = BATCH_SIZE): key, next_line = line_reader.read(filename_queue) query_image_name, label = tf.decode_csv( next_line, [tf.constant([], dtype=tf.string), tf.constant([], dtype = tf.int32)], field_delim=" ") # batch_query_image, batch_label = tf.train.batch( # [query_image_name, label], batch_size=batch_size) reverse_channel = True # for pre-trained purpose query_tensor = image_io.read_image(query_image_name, reverse_channel, FEATURE_ROW, FEATURE_COL) if SHUFFLE_DATA: min_after_dequeue = 100 capacity = min_after_dequeue + 3 * batch_size batch_query_image, batch_label = tf.train.shuffle_batch( [query_tensor, label], batch_size = batch_size, capacity=capacity, min_after_dequeue=min_after_dequeue) else: batch_query_image, batch_label = tf.train.batch( [query_tensor, label], batch_size=batch_size) return batch_query_image, batch_label
def read_tensors_from_csv(file_name, defaults=None, num_columns=None, batch_size=1, num_epochs=None, delimiter=',', randomize_input=True, num_threads=4): if file_name is None: raise ValueError( "Invalid file_name. file_name cannot be empty.") if defaults is None and num_columns is None: raise ValueError( "At least one of defaults and num_columns should not be None.") if defaults is None: defaults = [0.0 for _ in range(num_columns)] record_defaults = [[item] for item in defaults] examples = tf.contrib.learn.read_batch_examples( file_pattern=file_name, batch_size=batch_size, reader=tf.TextLineReader, randomize_input=randomize_input, num_threads=num_threads, num_epochs=num_epochs) columns = tf.decode_csv( examples, record_defaults=record_defaults, field_delim=delimiter) return columns
def build_csv_serving_tensors_for_transform_step(analysis_path, features, schema, stats, keep_target): """Builds a serving function starting from raw csv. This should only be used by transform.py (the transform step), and the For image columns, the image should be a base64 string encoding the image. The output of this function will transform that image to a 2048 long vector using the inception model. """ csv_header, record_defaults = csv_header_and_defaults(features, schema, stats, keep_target) placeholder = tf.placeholder(dtype=tf.string, shape=(None,), name='csv_input_placeholder') tensors = tf.decode_csv(placeholder, record_defaults) raw_features = dict(zip(csv_header, tensors)) transform_fn = make_preprocessing_fn(analysis_path, features, keep_target) transformed_tensors = transform_fn(raw_features) transformed_features = {} # Expand the dims of non-sparse tensors for k, v in six.iteritems(transformed_tensors): if isinstance(v, tf.Tensor) and v.get_shape().ndims == 1: transformed_features[k] = tf.expand_dims(v, -1) else: transformed_features[k] = v return input_fn_utils.InputFnOps( transformed_features, None, {"csv_example": placeholder})
def _decode_csv(line): """Takes the string input tensor and parses it to feature dict and target. All the columns except the first one are treated as feature column. The first column is expected to be the target. Only returns target for if with_target is True. Args: line: csv rows in tensor format. Returns: features: A dictionary of features with key as "column_names" from self._column_header. target: tensor of target values which is the first column of the file. This will only be returned if with_target==True. """ column_header = column_names if with_target else column_names[:4] record_defaults = [[0.] for _ in xrange(len(column_names) - 1)] # Pass label as integer. if with_target: record_defaults.append([0]) columns = tf.decode_csv(line, record_defaults=record_defaults) features = dict(zip(column_header, columns)) target = features.pop(column_names[4]) if with_target else None return features, target
def decode_csv(line): parsed_line = tf.decode_csv(line, [[0.], [0.], [0.], [0.], [0]]) label = parsed_line[-1:] del parsed_line[-1] features = parsed_line d = dict(zip(feature_names, features)), label return d
def _input_fn(): BATCH_SIZE = 40 filename_queue = tf.train.string_input_producer([filename]) reader = tf.TextLineReader() key, value = reader.read_up_to(filename_queue, num_records=BATCH_SIZE) record_defaults = [[0], [" "], [0], [" "], [0], [" "], [" "], [" "], [" "], [" "], [0], [0], [0], [" "], [" "]] columns = tf.decode_csv( value, record_defaults=record_defaults) features = dict(zip(COLUMNS, columns)) # save our label income_bracket = features.pop('income_bracket') # remove the fnlwgt key, which is not used features.pop('fnlwgt', 'fnlwgt key not found') # works in 0.12 only for feature_name in CATEGORICAL_COLUMNS: features[feature_name] = tf.expand_dims(features[feature_name], -1) income_int = tf.to_int32(tf.equal(income_bracket, " >50K")) return features, income_int
def parse_record(record): columns = tf.decode_csv(record, record_defaults=commons.HEADER_DEFAULTS, field_delim='\t') features = columns[0] target = columns[1:] target = tf.cast(tf.string_to_number(target), dtype=tf.int32) target = tf.stack(target, axis=0) return {commons.FEATURE_COL: features}, target
def get_input(input_file, batch_size, im_size=224): input = DATA_DIR + 'SegNet/SiftFlow/' + input_file filenames = [] with open(input, 'r') as f: for line in f: filenames.append('{}/{}'.format( DATA_DIR, line.strip())) # filenames.append('{}/{}.jpg {}'.format( # DATA_DIR, line.strip(), # line.strip())) filename_queue = tf.train.string_input_producer(filenames) filename, label_dir = tf.decode_csv(filename_queue.dequeue(), [[""], [""]], " ") label = label_dir; file_contents = tf.read_file(filename) im = tf.image.decode_jpeg(file_contents) im = tf.image.resize_images(im, im_size, im_size, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) im = tf.reshape(im, [im_size, im_size, 3]) im = tf.to_float(im) im_mean = tf.constant([122.67892, 116.66877, 104.00699], dtype=tf.float32) im = tf.sub(im, im_mean) # im = tf.image.per_image_whitening(im) # im = tf.image.per_image_whitening(im) min_queue_examples = int(10000 * 0.4) example_batch, lbl_batch = tf.train.batch([im, label], num_threads=1, batch_size=batch_size, capacity=min_queue_examples + 3 * batch_size) return example_batch, lbl_batch
def read(filename_queue): value = filename_queue.dequeue() fpath, label = tf.decode_csv( value, record_defaults=[[''], ['']], field_delim=' ') image_buffer = tf.read_file(fpath) return [image_buffer, label]
def _decode(example_batch): """Decode a batch of CSV lines into a feature map.""" if FLAGS.is_predicting: record_defaults = [[0.0], [""], [0.0], [""], [0.0], [""], [""], [""], [""], [""], [0.0], [0.0], [0.0], [""]] else: record_defaults = [[0.0], [""], [0.0], [""], [0.0], [""], [""], [""], [""], [""], [0.0], [0.0], [0.0], [""], [""]] fields = tf.decode_csv(example_batch, record_defaults, field_delim=',') if FLAGS.is_predicting: data = {LABEL: tf.constant("")} else: data = {LABEL: fields[14]} data["age"] = fields[0] data["workclass"] = fields[1] data["fnlwgt"] = fields[2] data["education"] = fields[3] data["education-num"] = fields[4] data["marital-status"] = fields[5] data["occupation"] = fields[6] data["relationship"] = fields[7] data["race"] = fields[8] data["sex"] = fields[9] data["capital-gain"] = fields[10] data["capital-loss"] = fields[11] data["hours-per-week"] = fields[12] data["native-country"] = fields[13] return data
def parse_csv(csv_row, is_serving=False): """Takes the string input tensor (csv) and returns a dict of rank-2 tensors. Takes a rank-1 tensor and converts it into rank-2 tensor, with respect to its data type (inferred from the metadata). Args: csv_row: rank-2 tensor of type string (csv). is_serving: boolean to indicate whether this function is called during serving or training, since the csv_row serving input is different than the training input (i.e., no target column). Returns: rank-2 tensor of the correct data type. """ if is_serving: column_names = metadata.SERVING_COLUMN_NAMES defaults = [] # create the defaults for the serving columns. for serving_feature in metadata.SERVING_COLUMN_NAMES: feature_index = metadata.COLUMN_NAMES.index(serving_feature) defaults.append(metadata.DEFAULTS[feature_index]) else: column_names = metadata.COLUMN_NAMES defaults = metadata.DEFAULTS columns = tf.decode_csv(csv_row, record_defaults=defaults) features = dict(zip(column_names, columns)) return features
def raw_training_input_fn(): """Training input function that reads raw data and applies transforms.""" if isinstance(raw_data_file_pattern, six.string_types): filepath_list = [raw_data_file_pattern] else: filepath_list = raw_data_file_pattern files = [] for path in filepath_list: files.extend(file_io.get_matching_files(path)) filename_queue = tf.train.string_input_producer( files, num_epochs=num_epochs, shuffle=randomize_input) csv_id, csv_lines = tf.TextLineReader().read_up_to(filename_queue, training_batch_size) queue_capacity = (reader_num_threads + 3) * training_batch_size + min_after_dequeue if randomize_input: _, batch_csv_lines = tf.train.shuffle_batch( tensors=[csv_id, csv_lines], batch_size=training_batch_size, capacity=queue_capacity, min_after_dequeue=min_after_dequeue, enqueue_many=True, num_threads=reader_num_threads, allow_smaller_final_batch=allow_smaller_final_batch) else: _, batch_csv_lines = tf.train.batch( tensors=[csv_id, csv_lines], batch_size=training_batch_size, capacity=queue_capacity, enqueue_many=True, num_threads=reader_num_threads, allow_smaller_final_batch=allow_smaller_final_batch) csv_header, record_defaults = csv_header_and_defaults(features, schema, stats, keep_target=True) parsed_tensors = tf.decode_csv(batch_csv_lines, record_defaults, name='csv_to_tensors') raw_features = dict(zip(csv_header, parsed_tensors)) transform_fn = make_preprocessing_fn(analysis_output_dir, features, keep_target=True) transformed_tensors = transform_fn(raw_features) # Expand the dims of non-sparse tensors. This is needed by tf.learn. transformed_features = {} for k, v in six.iteritems(transformed_tensors): if isinstance(v, tf.Tensor) and v.get_shape().ndims == 1: transformed_features[k] = tf.expand_dims(v, -1) else: transformed_features[k] = v # Remove the target tensor, and return it directly target_name = get_target_name(features) if not target_name or target_name not in transformed_features: raise ValueError('Cannot find target transform in features') transformed_target = transformed_features.pop(target_name) return transformed_features, transformed_target
def parse_example_tensor(examples, train_config, keep_target): """Read the csv files. Args: examples: string tensor train_config: training config keep_target: if true, the target column is expected to exist and it is returned in the features dict. Returns: Dict of feature_name to tensor. Target feature is in the dict. """ csv_header = [] if keep_target: csv_header = train_config['csv_header'] else: csv_header = [name for name in train_config['csv_header'] if name != train_config['target_column']] # record_defaults are used by tf.decode_csv to insert defaults, and to infer # the datatype. record_defaults = [[train_config['csv_defaults'][name]] for name in csv_header] tensors = tf.decode_csv(examples, record_defaults, name='csv_to_tensors') # I'm not really sure why expand_dims needs to be called. If using regression # models, it errors without it. tensors = [tf.expand_dims(x, axis=1) for x in tensors] tensor_dict = dict(zip(csv_header, tensors)) return tensor_dict
def batch_producer(filepath, n_classes, **kwargs): """Function for loading batches of images and and labels from a csv *without* a header. CSV files must be in the format of class_code,/abs/path/to/img class_code,/abs/path/to/img class_code,/abs/path/to/img Parameters ----------- filepath : list list of paths to csv files. Even if just using one file, it must be a list. For example ['/path/to/file.csv'] n_classes : int number of classes to be used in one-hot encoding batch_size : (kwarg) int number of samples per batch. Default is 4 epochs : (kwarg) int number of epochs to run. Default is 70 img_shape : (kwarg) tuple shape of the image. Must be in the form of (H,W,C). Image will *not* be resized, the value is used for setting the shape for the batch queue. Default is (224, 224, 3) is_training : (kwarg) bool when set to true, the loader will apply image transformations. Default is True num_threads : (kwarg) int number of threads to use for the loader. Default is 4 """ batch_size = kwargs.pop("batch_size", 4) img_shape = kwargs.pop("image_shape", (224, 224, 3)) num_threads = kwargs.pop("num_threads", 4) epochs = kwargs.pop("epochs", 70) is_training = kwargs.pop("is_trianing", True) # loads a series of text files filename_queue = tf.train.string_input_producer(filepath, num_epochs=epochs) # used to read each text file line by line reader = tf.TextLineReader() # actually parse the text file. returns idx, content _, record = reader.read(filename_queue) # split out the csv. Defaults to returning strings. img_class, fname = tf.decode_csv(record, record_defaults=[[1], [""]]) img_content = read_one_image(fname, is_training=is_training, image_shape=img_shape) # load batches of images all multithreaded like class_batch, img_batch = tf.train.shuffle_batch([img_class, img_content], batch_size=batch_size, capacity=batch_size * 4, num_threads=num_threads, min_after_dequeue=batch_size * 2) one_hot_classes = tf.one_hot(class_batch, depth=n_classes, on_value=1.0, off_value=0.0) return one_hot_classes, img_batch
def read_data(filename_queue): reader = tf.TextLineReader() key, record_string = reader.read(filename_queue) record_defaults = [[1], [1], [1]] col1, col2, col3 = tf.decode_csv(record_string, record_defaults=record_defaults) features = tf.pack([col1, col2]) label = col3 return features, label
def read_mnist_csv(filename_queue): reader = tf.TextLineReader(skip_header_lines=1) key, value = reader.read(filename_queue) record_defaults = [[0]for row in range(785)] cols = tf.decode_csv( value, record_defaults=record_defaults) features = tf.stack(cols[1:]) label = tf.stack([cols[0]]) return features, label
def read_from_csv(filename_queue): reader = tf.TextLineReader() key, value = reader.read(filename_queue) record_defaults = [[""],[0]] image_path, label = tf.decode_csv(value, field_delim=" ", record_defaults=record_defaults) print("imagepath is: ", image_path) image = tf.image.decode_jpeg(tf.read_file(image_path), channels=3) return image, label
gpu_options=tf.GPUOptions(allow_growth=True, visible_device_list='1')) start_time = time.time() print("start time : " + str(start_time)) with tf.name_scope('LoadImage'): csv_name = a.csv_name #csv_name = "/home/zhaoyin-t/plant_disease/traindata_int_small_random.csv" #合成あり #csv_name = "/home/zhaoyin-t/plant_disease/traindata_seg_int.csv" #セグメンテーション filename_queue = tf.train.string_input_producer([csv_name], shuffle=True) reader = tf.TextLineReader() _, val = reader.read(filename_queue) record_defaults = [["a"], ["a"], [0]] #record_defaults = [["a"],[0], [0], [0]] #path, _, label = tf.decode_csv(val, record_defaults=record_defaults) path, _, label = tf.decode_csv(val, record_defaults=record_defaults) readfile = tf.read_file(path) image = tf.image.decode_jpeg(readfile, channels=3) image = tf.image.convert_image_dtype(image, dtype=tf.float32) image = tf.cast(image, dtype=tf.float32) height, width, ch = image.get_shape() # transform params CROP_SIZE = 256 SCALE_SIZE = 286 rot90_times = tf.random_uniform([1], 0, 5, dtype=tf.int32)[0] crop_offset = tf.cast(tf.floor( tf.random_uniform([2], 0, SCALE_SIZE - CROP_SIZE + 1, seed=seed)), dtype=tf.int32) def transform(img,
csv_file_names = Get_files_path() # 1) create a FIFO queue # filename_queue = tf.train.string_input_producer(csv_file_names) print('filename_queue: ', filename_queue) # 1.1) Reader reader = tf.TextLineReader() print('reader: ', reader) key, value = reader.read(filename_queue) # 1.2) Parse line record_defaults = [[] for i in range(785) ] # notice : tf.decode_csv return like this format-type # print('record_defaults: ', record_defaults) parse_record_op = tf.decode_csv(value, record_defaults=record_defaults, field_delim=',') # return a list # print('parse_record: ', len(parse_record_op)) #feature = parse_record[1:] #label = parse_record[0] # 2) Paras define random_par = tf.Variable( tf.random_normal(shape=(2, 3), mean=0, stddev=1.0, dtype=tf.float32)) zeros_par = tf.Variable(tf.zeros(shape=(2, 3), dtype=tf.float32)) init_op = tf.global_variables_initializer() # 3) Session run gpu_options = tf.GPUOptions(allow_growth=True) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
def parse(line): fields = tf.decode_csv(line, [[0.0], [0.0]], field_delim="\t") return {"x": fields[0]}, fields[1]
def parse_csv(value): print('Parsing', data_file) columns = tf.decode_csv(value, record_defaults=_CSV_COLUMN_DEFAULTS) features = dict(zip(_CSV_COLUMNS, columns)) labels = features.pop('income_bracket') return features, tf.equal(labels, '>50K')
captcha_dir = captcha_dir + "labels.csv" # Construct file queue file_queue = tf.train.string_input_producer([captcha_dir], shuffle=False) # reader = tf.TextLineReader() # Read the label data content of excel key, value = reader.read(file_queue) # decode csv data # records: Specify matrix format and data type The 1 in #[1] is used to specify the data type. For example, if there is a decimal in the matrix, it is float, and [1] should be changed to [1.0]. records = [[1], ["None"]] number, label = tf.decode_csv(value, record_defaults=records) # Batch data label_batch = tf.train.batch([label], batch_size=6000, num_threads=1, capacity=6000) return label_batch def dealwuthlabel(label_str): """ :param label_str: :return: """ # Type of verification code string letter = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
my_matrix = np.loadtxt(open("../data/d_train_20180102.csv", "rb"), dtype=data_tpye, delimiter=",", skiprows=0) filename_queue = tf.train.string_input_producer([ "../data/d_train_20180102.csv", ]) # 每次一行 print(my_matrix[0][41]) reader = tf.TextLineReader(skip_header_lines=1) key, value = reader.read(queue=filename_queue) key, value = reader.read(filename_queue) # 解析每次的一行,默认以逗号分开 record_defaults = list(data_tpye1) col1, col2, col3, col4, col5, ol6, col7, col8, col9, col10, col11, col12, col13, col14, col15, col16, col17, col18, col19, col20, col21, col22, col23, col24, col25, col26, col27, col28, col29, col30, col31, col32, col33, col34, col35, col36, col37, col38, col39, col40, col41, col42 = tf.decode_csv( value, record_defaults=record_defaults) features = tf.stack([col1]) init_op = tf.global_variables_initializer() local_init_op = tf.local_variables_initializer() number_1 = [] age_1 = [] number_2 = [] tang_1 = [] for i in range(30): ii = rd.randint(1, 5642) number_1 = number_1 + [my_matrix[ii][0]] print(number_1) age_1 = age_1 + [my_matrix[ii][2]] number_2 = number_2 + [my_matrix[ii][8]]
import tensorflow as tf filename_queue = tf.train.string_input_producer([ "hdfs://127.0.0.1:39000/linear/training.csv", "hdfs://127.0.0.1:39000/linear/validation.csv", ]) reader = tf.TextLineReader() key, value = reader.read(filename_queue) x_observed, y_pred = tf.decode_csv(value, [[0.0],[0.0]]) with tf.Session() as sess: # Start populating the filename queue. coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) try: for i in range(20): # Retrieve and print a single instance: example, label = sess.run([x_observed, y_pred]) print(example, label) except tf.errors.OutOfRangeError: print("Done!") finally: coord.request_stop() coord.join(threads)
def decode_csv(row): cols = tf.decode_csv(row, record_defaults=ORDERED_TRAINING_DEFAULTS) features = dict(zip(ORDERED_TRAINING_COLUMNS, cols)) return features
def main(): file_queue = tf.train.string_input_producer(['/data/kaggle/train.csv'], shuffle=False) reader = tf.TextLineReader() key, value = reader.read(file_queue) value = tf.decode_csv(value, [[1.]] * 66) gender, age, TotalGV, Intracranial_volume = value[:4] DKT = tf.cast(value[4:], tf.float32) label = tf.one_hot(tf.cast(gender - 1, tf.int32), 2) x_batch, label_batch = tf.train.shuffle_batch([DKT, label], 121, 484, 0, 1) batch_queue = tf.contrib.slim.prefetch_queue.prefetch_queue( [x_batch, label_batch], capacity=4) dequeue_op = batch_queue.dequeue() dropout_rate = tf.placeholder(tf.float32) temp = tf.layers.dense(x_batch, 1024, activation=tf.nn.sigmoid) temp = tf.layers.dense(temp, 2048, activation=tf.nn.sigmoid) temp = tf.layers.dropout(temp, dropout_rate) y = tf.layers.dense(temp, 2) loss = tf.losses.softmax_cross_entropy(onehot_labels=label_batch, logits=y) train = tf.train.AdamOptimizer(0.001).minimize(loss) accuracy = tf.metrics.accuracy(labels=tf.argmax(label_batch, 1), predictions=tf.argmax(y, 1))[1] config = tf.ConfigProto(log_device_placement=True) config.gpu_options.per_process_gpu_memory_fraction = 1.0 config.gpu_options.allow_growth = True sess = tf.Session(config=config) saver = tf.train.Saver() init = (tf.global_variables_initializer(), tf.local_variables_initializer()) tf.summary.scalar('loss', loss) tf.summary.scalar('accuracy', accuracy) merged = tf.summary.merge_all() logdir_train = "tensorboard_train/" + datetime.datetime.now().strftime( "%Y%m%d-%H%M%S") + "/" logdir_test = "tensorboard_test/" + datetime.datetime.now().strftime( "%Y%m%d-%H%M%S") + "/" writer_train = tf.summary.FileWriter(logdir_train, sess.graph) writer_test = tf.summary.FileWriter(logdir_test, sess.graph) sess.run(init) saver.restore(sess, log) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess, coord) global_step = 0 for i in range(10000): for j in range(3): _, sumery, loss_train = sess.run([train, merged, loss], {dropout_rate: 0.5}) print('step {}: loss = {}'.format(global_step, loss_train)) writer_train.add_summary(sumery, global_step) global_step += 1 sumery = sess.run(merged, {dropout_rate: 1}) writer_test.add_summary(sumery, global_step) saver.save(sess, log) coord.request_stop() coord.join(threads) print('complete')
def _parse_infer_csv(line): cols_types = [['']] * 2 columns = tf.decode_csv(line, record_defaults=cols_types, field_delim='\t') return columns
def raw_training_input_fn(): """Training input function that reads raw data and applies transforms.""" if isinstance(raw_data_file_pattern, six.string_types): filepath_list = [raw_data_file_pattern] else: filepath_list = raw_data_file_pattern files = [] for path in filepath_list: files.extend(file_io.get_matching_files(path)) filename_queue = tf.train.string_input_producer( files, num_epochs=num_epochs, shuffle=randomize_input) csv_id, csv_lines = tf.TextLineReader().read_up_to( filename_queue, training_batch_size) queue_capacity = (reader_num_threads + 3) * training_batch_size + min_after_dequeue if randomize_input: _, batch_csv_lines = tf.train.shuffle_batch( tensors=[csv_id, csv_lines], batch_size=training_batch_size, capacity=queue_capacity, min_after_dequeue=min_after_dequeue, enqueue_many=True, num_threads=reader_num_threads, allow_smaller_final_batch=allow_smaller_final_batch) else: _, batch_csv_lines = tf.train.batch( tensors=[csv_id, csv_lines], batch_size=training_batch_size, capacity=queue_capacity, enqueue_many=True, num_threads=reader_num_threads, allow_smaller_final_batch=allow_smaller_final_batch) csv_header, record_defaults = csv_header_and_defaults(features, schema, stats, keep_target=True) parsed_tensors = tf.decode_csv(batch_csv_lines, record_defaults, name='csv_to_tensors') raw_features = dict(zip(csv_header, parsed_tensors)) transform_fn = make_preprocessing_fn(analysis_output_dir, features, keep_target=True) transformed_tensors = transform_fn(raw_features) # Expand the dims of non-sparse tensors. This is needed by tf.learn. transformed_features = {} for k, v in six.iteritems(transformed_tensors): if isinstance(v, tf.Tensor) and v.get_shape().ndims == 1: transformed_features[k] = tf.expand_dims(v, -1) else: transformed_features[k] = v # image_feature_engineering does not need to be called as images are not # supported in raw csv for training. # Remove the target tensor, and return it directly target_name = get_target_name(features) if not target_name or target_name not in transformed_features: raise ValueError('Cannot find target transform in features') transformed_target = transformed_features.pop(target_name) return transformed_features, transformed_target
filenames = ['data/text%d.txt'%i for i in range(1,4)] filename_queue = tf.train.string_input_producer(filenames, capacity=3, shuffle=True, name='string_input_producer') for f in filenames: if not tf.gfile.Exists(f): raise ValueError('Failed to find file: ' + f) else: print('File %s found.'%f) reader = tf.TextLineReader() key, value = reader.read(filename_queue, name='text_read_op') record_defaults = [[-1.0], [-1.0],[-1.0], [-1.0],[-1.0], [-1.0],[-1.0], [-1.0],[-1.0], [-1.0]] col1, col2, col3, col4, col5, col5, col7, col8, col9, col10 = tf.decode_csv(value, record_defaults=record_defaults) features = tf.stack([col1, col2, col3, col4, col5, col5, col7, col8, col9, col10]) x = tf.train.shuffle_batch([features], batch_size=3, capacity=5, name='data_batch', min_after_dequeue=1, num_threads=1) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord, sess=session) W = tf.Variable(tf.random_uniform(shape=[10,5], minval=-0.1, maxval=0.1, dtype=tf.float32), name='W') b = tf.Variable(tf.zeros(shape=[5], dtype=tf.float32), name='b') h = tf.nn.sigmoid(tf.matmul(x, W) + b) tf.global_variables_initializer().run() print("test")
from six.moves import xrange # pylint: disable=redefined-builtin import tensorflow as tf from sklearn import metrics import define_net_resp hidden1=10 hidden2=10 hidden3=10 learning_rate=0.1 max_steps=6000 file_tt=str(sys.argv[1]) l_ex=file_len(file_tt) filename_tt = tf.train.string_input_producer([file_tt],shuffle=False) reader_tt = tf.TextLineReader(skip_header_lines=0) _, csv_row_tt = reader_tt.read(filename_tt) col1t, col2t, col3t, col4t, col5t, col6t, col7t, col8t, col9t, col10t, col11t, col12t, col13t, col14t = tf.decode_csv(csv_row_tt, record_defaults=record_defaults) features_tt = tf.pack([col1t, col2t, col3t, col4t, col5t, col6t, col7t, col8t, col9t, col10t, col11t, col12t, col13t]) min_after_dequeue = 10000000 capacity = min_after_dequeue + define_net_resp.FEATURES * batch_size images_batch,label_batch = tf.train.batch([features_tt,col14t-1],batch_size=batch_size, capacity=capacity,num_threads=1) def main(_): run_training() if __name__ == '__main__': tf.app.run()
# REF [site] >> https://www.tensorflow.org/programmers_guide/reading_data import tensorflow as tf #-------------------------------------------------------------------- # CSV file. filename_queue = tf.train.string_input_producer(["file0.csv", "file1.csv"]) reader = tf.TextLineReader() key, value = reader.read(filename_queue) # Default values, in case of empty columns. Also specifies the type of the decoded result. record_defaults = [[1], [1], [1], [1], [1]] col1, col2, col3, col4, col5 = tf.decode_csv(value, record_defaults = record_defaults) features = tf.stack([col1, col2, col3, col4]) with tf.Session() as sess: # Start populating the filename queue. coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord = coord) for i in range(1200): # Retrieve a single instance: example, label = sess.run([features, col5]) coord.request_stop() coord.join(threads)
def decode_csv(csv_row): filename, label = tf.decode_csv(records=csv_row, record_defaults=[[''], ['']]) image_bytes = tf.read_file(filename=filename) return image_bytes, label
def get_batch(param_dict=params.default_param_dict(), shuffled=True): """ uses data_batch_size, data_dir, data_mode, data_serialized """ batch_size = param_dict["data_batch_size"] all_files = sorted(os.listdir(param_dict["data_dir"])) nthreads = 1 # get data characteristics data_char_file = os.path.join(param_dict["data_dir"], "info.yml") with open(data_char_file) as infile: data_dict = yaml.safe_load(infile) print("### start load_data") print("data class_column: " + str(data_dict['class_column'])) print("data num_metadata: " + str(data_dict['num_metadata'])) print("data num_examples: " + str(data_dict['num_examples'])) # get list of files filenames = [] filesuffix = '.data' readtype = tf.TextLineReader if param_dict["data_serialized"]: filesuffix = '.tfrecord' readtype = tf.TFRecordReader for fname in all_files: if filesuffix in fname: filenames.extend([param_dict["data_dir"] + fname]) nreaders = min(nthreads, len(filenames)) print("data number of datafiles: " + str(len(filenames))) print("data example datafile: " + filenames[0]) print("data batch_size: " + str(batch_size)) print("data nreaders: " + str(nreaders)) # read in example if shuffled: # Reads multiple records in parallel from data_sources using n readers. key, example = slim.parallel_reader.parallel_read( filenames, readtype, num_epochs=None, num_readers=nreaders, shuffle=True, dtypes=None, capacity=32 * batch_size, min_after_dequeue=16 * batch_size, seed=19850411, scope=None) else: # Reads sequentially the data_sources using the reader, doing a single pass. filename_queue = tf.train.string_input_producer(filenames, shuffle=False) reader = readtype() key, example = reader.read(filename_queue) #print("data raw example size: " + str(example)) # decode example into features, label and metadata if param_dict["data_serialized"]: parse_example = tf.parse_single_example( example, features={ 'feature_values': tf.FixedLenFeature([data_dict['class_column'] - 1], tf.float32), 'label': tf.FixedLenFeature([1], tf.int64), 'meta_values': tf.FixedLenFeature([data_dict['num_metadata']], tf.string) }) features = tf.cast(parse_example['feature_values'], tf.float32) label = tf.cast(parse_example['label'], tf.int32) metadata = tf.cast(parse_example['meta_values'], tf.string) else: record_defaults = [[1.0] for dim in range(data_dict['class_column'] - 1)] record_defaults.extend([[1]]) record_defaults.extend([['str'] for dim in range(data_dict['num_metadata'])]) print("data record_defaults length: " + str(len(record_defaults))) reader = tf.decode_csv(records=example, record_defaults=record_defaults, field_delim="\t") #print("data size of reader: " + str(reader)) #tf.decode_csv() from slim.parallel_reader.parallel_read() returns tensors #with <unknown> shape. #This shape needs to be casted to () to be used with tf.train.batch() reshaped_reader = [] for tensor in reader: reshaped_reader.append(tf.reshape(tensor, [])) #print("data size of reshaped_reader: " + str(reshaped_reader)) features = reshaped_reader[0:data_dict['class_column'] - 1] label = reshaped_reader[data_dict['class_column'] - 1:data_dict['class_column']] metadata = reshaped_reader[data_dict['class_column']:( data_dict['class_column'] + data_dict['num_metadata'])] #print("data size of features: " + str(features)) #print("data size of label: " + str(label)) #print("data size of metadata: " + str(metadata)) # reformat example features label = tf.squeeze(label) input_size = data_dict['class_column'] - 1 if param_dict['data_mode'] == 'diff': input_size = int((input_size) / 2) features = (tf.slice(features, [input_size], [input_size]) - tf.slice(features, [0], [input_size])) elif param_dict['data_mode'] == 'exp_only': input_size = int((input_size) / 2) features = tf.slice(features, [input_size], [input_size]) #features.set_shape([input_size]) param_dict['data_input_size'] = input_size print("data orig input_size: " + str(data_dict['class_column'] - 1)) print("data final input_size: " + str(input_size)) # create batch if shuffled: feat_b, label_b, meta_b = tf.train.shuffle_batch( [features, label, metadata], batch_size=batch_size, num_threads=nthreads, capacity=32 * batch_size, min_after_dequeue=16 * batch_size, seed=19850411, allow_smaller_final_batch=True) else: feat_b, label_b, meta_b = tf.train.batch( [features, label, metadata], batch_size=batch_size, num_threads=1, capacity=batch_size, allow_smaller_final_batch=True) return feat_b, label_b, meta_b, input_size, data_dict[ 'num_metadata'], data_dict['num_examples']
def parse_row(row): fields = tf.decode_csv(records=row, record_defaults=CSV_DEFAULTS) features = dict(zip(CSV_COLUMN_NAMES, fields)) label = features.pop("fare_amount") return features, label
def parser_csv(line): parsed_line = tf.decode_csv(line, [['string'], ['tf.int64'], ['tf.int64']]) label = parsed_line[-1] augment = parsed_line[1] return parsed_line[0], augment, label
def load_train_batch(self): """Load a batch of training instances. """ opt = self.opt # Load the list of training files into queues #TODO if opt.train_lite: file_list = self.format_file_list(opt.dataset_dir, opt.filelist_dir, 'train_lite') else: file_list = self.format_file_list(opt.dataset_dir, opt.filelist_dir, 'train') image_paths_queue = tf.train.string_input_producer( file_list['image_file_list'], shuffle=False) cam_paths_queue = tf.train.string_input_producer( file_list['cam_file_list'], shuffle=False) # Load camera intrinsics cam_reader = tf.TextLineReader() _, raw_cam_contents = cam_reader.read(cam_paths_queue) rec_def = [] for i in range(9): rec_def.append([1.]) raw_cam_vec = tf.decode_csv(raw_cam_contents, record_defaults=rec_def) raw_cam_vec = tf.stack(raw_cam_vec) intrinsics = tf.reshape(raw_cam_vec, [3, 3]) # Load images img_reader = tf.WholeFileReader() _, image_contents = img_reader.read(image_paths_queue) image_seq = tf.image.decode_jpeg(image_contents) tgt_image, src_image_stack = \ self.unpack_image_sequence( image_seq, opt.img_height, opt.img_width, opt.num_source) #TODO Load Semantics # See cityscape label defs in https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/helpers/labels.py#L62 # Also notice that deeplabv3+ uses `train_id` https://github.com/tensorflow/models/blob/69b016449ffc797421bf003d8b7fd8545db866d7/research/deeplab/datasets/build_cityscapes_data.py#L46 # Color maps are in https://github.com/tensorflow/models/blob/69b016449ffc797421bf003d8b7fd8545db866d7/research/deeplab/utils/get_dataset_colormap.py#L207 if opt.sem_assist: sem_paths_queue = tf.train.string_input_producer( file_list['sem_image_file_list'], shuffle=False) sem_reader = tf.WholeFileReader() sem_keys, sem_contents = sem_reader.read(sem_paths_queue) if opt.load_from_raw: sem_seq = tf.reshape( tf.decode_raw(sem_contents, tf.uint8), [1, opt.img_height, (opt.num_source + 1) * opt.img_width]) else: sem_seq = tf.py_func(read_npy_file, [sem_keys], [ tf.uint8, ]) #TODO Load Instances: we use COCO # Two channels: class and id level. For id level we only use the edge if opt.ins_assist: ins_paths_queue = tf.train.string_input_producer( file_list['ins_image_file_list'], shuffle=False) ins_reader = tf.WholeFileReader() ins_keys, ins_contents = ins_reader.read(ins_paths_queue) if opt.load_from_raw: ins_seq = tf.reshape(tf.decode_raw(ins_contents, tf.uint8), [ 1, opt.img_height, (opt.num_source + 1) * opt.img_width, 2 ]) else: ins_seq = tf.py_func(read_npy_file, [ins_keys], [ tf.uint8, ]) #TODO 1. SHUFFLE BATCH # Form training batches seed = random.randint(0, 2**31 - 1) min_after_dequeue = 2048 capacity = min_after_dequeue + opt.num_threads * opt.batch_size if opt.sem_assist and opt.ins_assist: src_image_stack, tgt_image, intrinsics, sem_seq, ins_seq = tf.train.shuffle_batch( [ src_image_stack, tgt_image, intrinsics, sem_seq[0], ins_seq[0] ], opt.batch_size, capacity, min_after_dequeue, opt.num_threads, seed) elif opt.sem_assist: src_image_stack, tgt_image, intrinsics, sem_seq = tf.train.shuffle_batch( [src_image_stack, tgt_image, intrinsics, sem_seq[0]], opt.batch_size, capacity, min_after_dequeue, opt.num_threads, seed) elif opt.ins_assist: src_image_stack, tgt_image, intrinsics, ins_seq = tf.train.shuffle_batch( [src_image_stack, tgt_image, intrinsics, ins_seq[0]], opt.batch_size, capacity, min_after_dequeue, opt.num_threads, seed) else: src_image_stack, tgt_image, intrinsics = tf.train.shuffle_batch( [src_image_stack, tgt_image, intrinsics], opt.batch_size, capacity, min_after_dequeue, opt.num_threads, seed) # semantic segmentation tgt_sem = None tgt_sem_map = None tgt_sem_mask = None tgt_sem_edge = None src_sem_stack = None src_sem_map_stack = None src_sem_mask_stack = None src_sem_edge_stack = None # ins0 ~ instance level, but still class segmentation tgt_ins0 = None tgt_ins0_map = None tgt_ins0_edge = None src_ins0_stack = None src_ins0_map_stack = None src_ins0_edge_stack = None # ins1 ~ instance level, but this is id segmentation tgt_ins1_edge = None src_ins1_edge_stack = None #TODO 2. TRAMSFORMATION AND UNPACKING if opt.sem_assist: #TODO get one-hot encoded sem_oh_seq (4,128,1248,19)X{0,1} sem_oh_seq = tf.cast( tf.one_hot(sem_seq, on_value=1, depth=opt.sem_num_class), tf.uint8) #TODO decouple tgt_sem (4,128,1248,19)X{0,1} src_sem_stack (4,128,1248,2*19)X{0,1} tgt_sem, src_sem_stack = self.unpack_sem_sequence_batch_atom( sem_oh_seq, opt.sem_num_class) #TODO get densemap sem_map_seq (4,128,1248,1)X{0,1,...,18} sem_map_seq = tf.expand_dims(sem_seq, -1) #TODO decouple tgt_sem_map (4,128,1248,1)X{0,1,...,18} src_sem_map_stack (4,128,1248,2*1)X{0,1,...,18} tgt_sem_map, src_sem_map_stack = self.unpack_sem_sequence_batch_atom( sem_map_seq, 1) if opt.sem_mask_explore: #TODO get sem mask sem_mask_seq (4,128,1248,c) here we assume c=1 sem_mask_seq = self.get_sem_mask_batch(sem_seq) #TODO decouple tgt_sem_mask (4,128,1248,c) src_sem_mask_stack (4,128,1248,2*c) tgt_sem_mask, src_sem_mask_stack = self.unpack_sem_sequence_batch_atom( sem_mask_seq, 1) if opt.sem_edge_explore: #TODO get sem edge sem_edge_seq (4,128,1248,c) here we assume c=1 sem_edge_seq = self.get_sem_edge_batch(sem_seq) #TODO decouple tgt_sem_edge (4,128,1248,c) src_sem_edge_stack (4,128,1248,2*c) tgt_sem_edge, src_sem_edge_stack = self.unpack_sem_sequence_batch_atom( sem_edge_seq, 1) if opt.ins_assist: ins0_seq = ins_seq[:, :, :, 0] ins1_seq = ins_seq[:, :, :, 1] #TODO get one-hot ins0_oh_seq (4,128,1248,81)X{0,1} ins0_oh_seq = tf.cast( tf.one_hot(ins0_seq, on_value=1, depth=opt.ins_num_class), tf.uint8) #ins1_oh_seq = tf.cast(tf.one_hot(ins1_seq, on_value=1, depth = 255), tf.uint8) #TODO decouple tgt_ins0 (4,128,1248,81)X{0,1} src_ins0_stack (4,128,1248,2*81)X{0,1} tgt_ins0, src_ins0_stack = self.unpack_sem_sequence_batch_atom( ins0_oh_seq, opt.ins_num_class) #tgt_ins1, src_ins1_stack = self.unpack_sem_sequence_batch_atom(ins1_oh_seq, opt.ins_num_class) #TODO get densemap sem_ins0_seq (4,128,1248,1)X{0,1,...,80} ins0_map_seq = ins_seq[:, :, :, :1] ins1_map_seq = ins_seq[:, :, :, 1:] #TODO decouple tgt_ins0_map (4,128,1248,1)X{0,1,...,80} src_ins0_map_stack (4,128,1248,2*1)X{0,1,...,80} tgt_ins0_map, src_ins0_map_stack = self.unpack_sem_sequence_batch_atom( ins0_map_seq, 1) tgt_ins1_map, src_ins1_map_stack = self.unpack_sem_sequence_batch_atom( ins1_map_seq, 1) if opt.ins0_edge_explore: #TODO get edge ins0_edge_seq (4,128,1248,c) here we assume c=1 ins0_edge_seq = self.get_sem_edge_batch(ins0_seq) #TODO decouple tgt_ins0_edge (4,128,1248,c) src_ins0_edge_stack (4,128,1248,2*c) tgt_ins0_edge, src_ins0_edge_stack = self.unpack_sem_sequence_batch_atom( ins0_edge_seq, 1) if opt.ins1_edge_explore: #TODO get edge ins1_edge_seq (4,128,1248,c) here we assume c=1 ins1_edge_seq = self.get_sem_edge_batch(ins1_seq) #TODO decouple tgt_ins1_edge (4,128,1248,c) src_ins1_edge_stack (4,128,1248,2*c) tgt_ins1_edge, src_ins1_edge_stack = self.unpack_sem_sequence_batch_atom( ins1_edge_seq, 1) #TODO 3. DATA AUGMENTATION image_all = tf.concat([tgt_image, src_image_stack], axis=3) image_all, intrinsics, aug_params = self.data_augmentation( image_all, intrinsics, opt.img_height, opt.img_width) #TODO changed API if opt.sem_assist: ##TODO Do the same data augmentation for semantic segmentations tgt_sem, src_sem_stack = self.data_aug(tgt_sem, src_sem_stack, aug_params, "bilinear") tgt_sem_map, src_sem_map_stack = self.data_aug( tgt_sem_map, src_sem_map_stack, aug_params, "neighbor") if self.opt.sem_mask_explore: tgt_sem_mask, src_sem_mask_stack = \ self.data_aug(tgt_sem_mask, src_sem_mask_stack, aug_params, "bilinear") if self.opt.sem_edge_explore: tgt_sem_edge, src_sem_edge_stack = \ self.data_aug(tgt_sem_edge, src_sem_edge_stack, aug_params, "bilinear") #TODO maybe transfer needs this settings self.data_aug(tgt_sem_edge, src_sem_edge_stack, aug_params, "neighbor") if opt.ins_assist: ##TODO Do the same data augmentation for instance segmentations tgt_ins0, src_ins0_stack = self.data_aug(tgt_ins0, src_ins0_stack, aug_params, "bilinear") #tgt_ins1, src_ins1_stack = self.data_aug(tgt_ins1, src_ins1_stack, aug_params, "bilinear") tgt_ins0_map, src_ins0_map_stack = self.data_aug( tgt_ins0_map, src_ins0_map_stack, aug_params, "neighbor") #tgt_ins1_map, src_ins1_map_stack = self.data_aug(tgt_ins1_map, src_ins1_map_stack, aug_params, "neighbor") if self.opt.ins0_edge_explore: tgt_ins0_edge, src_ins0_edge_stack = \ self.data_aug(tgt_ins0_edge, src_ins0_edge_stack, aug_params, "bilinear") #TODO maybe transfer needs this settings self.data_aug(tgt_ins0_edge, src_ins0_edge_stack, aug_params, "neighbor") if self.opt.ins1_edge_explore: tgt_ins1_edge, src_ins1_edge_stack = \ self.data_aug(tgt_ins1_edge, src_ins1_edge_stack, aug_params, "bilinear") #TODO maybe transfer needs this settings self.data_aug(tgt_ins1_edge, src_ins1_edge_stack, aug_params, "neighbor") # 4. RETURN # image_channels=3*opt.seq_length tgt_image = image_all[:, :, :, :3] src_image_stack = image_all[:, :, :, 3:] #3:image_channels] intrinsics = self.get_multi_scale_intrinsics(intrinsics, opt.num_scales) # if opt.sem_assist and opt.ins_assist: return tgt_image, src_image_stack, intrinsics, \ [tgt_sem, tgt_sem_map, tgt_sem_mask, tgt_sem_edge], \ [src_sem_stack, src_sem_map_stack, src_sem_mask_stack, src_sem_edge_stack], \ [tgt_ins0, tgt_ins0_map, tgt_ins0_edge, tgt_ins1_edge], \ [src_ins0_stack, src_ins0_map_stack, src_ins0_edge_stack, src_ins1_edge_stack]
def decode_csv(row): # row is a string tensor containing the contents of one row features = tf.decode_csv(row, record_defaults=DEFAULTS) # string tensor -> list of 50 rank 0 float tensors label = features.pop() # remove last feature and use as label features = tf.stack(features) # list of rank 0 tensors -> single rank 1 tensor return {TIMESERIES_COL: features}, label
def train_model(path, in_seq_size, out_seq_size, units, layers, trainiterations, batch_size, dout, dictionary, restore=False): # Placeholder variables to be feed into the model at run time x = tf.placeholder(tf.int32, shape=[None, None]) y = tf.placeholder(tf.int32, shape=[None, None]) targets = tf.placeholder(tf.int32, shape=[None, None]) keep = tf.placeholder(tf.float32) # Dictionary reversed to lookup words from values rvsdictionary = dict(izip(dictionary.values(), dictionary.keys())) # Number of words in the dictionary dictsize = len(dictionary) # Files that the model is trained on filename_queue = tf.train.string_input_producer( [str(path) + "dialogs.csv"]) # Reads the files in the filename_queue reader = tf.TextLineReader() key, value = reader.read(filename_queue) # Decodes the CSV to read the 1 pair string for the training data record_defaults = [[""], [""]] col1, col2 = tf.decode_csv(value, record_defaults=record_defaults, field_delim=",") # Constructs 2 tensors for the features(Input sentences) and labels(correct outputs) features = tf.pack(col1) labels = tf.pack(col2) # Shuffles the inputs features, labels = tf.train.shuffle_batch([features, labels], batch_size, capacity=10000, min_after_dequeue=5000, num_threads=4) # Arrays to hold the inputs and the correct outputs teminp = [] temoutput = [] temtarget = [] # Makes the list of inputs for the rnn for o in range(in_seq_size): teminp.append(x[:, o]) # Makes the list of inputs for the rnn for o in range(out_seq_size): temoutput.append(y[:, o]) temtarget.append(targets[:, o]) # Makes the temporary weights to train the model W1 = tf.placeholder(tf.float32, shape=[batch_size, out_seq_size]) W1_0 = [] for j in range(out_seq_size): W1_0.append(W1[:, j]) # Makes the rnn cell(Gated Recurrent Unit(rnn cell alternative)) cell1 = tf.nn.rnn_cell.GRUCell(units) # Adds dropout in the layers to make the model more robust drop = tf.nn.rnn_cell.DropoutWrapper(cell1, input_keep_prob=keep, output_keep_prob=keep) # Makes multiple layers of the model cell = tf.nn.rnn_cell.MultiRNNCell([drop] * layers) # Number of samples for sampled softmax num_samples = 512 # Makes the output projection layer by creating the variables the weights(w) and the bias(b) w = tf.get_variable("proj_w", [units, dictsize]) w_t = tf.transpose(w) with tf.device("/cpu:0"): b = tf.get_variable("proj_b", [dictsize]) # Output projection to take the rnn outputs and turn them into the word outputs output_projection = (w, b) # Sampling function to test the outputs and train the model def sampled_loss(inputs, labels): labels = tf.reshape(labels, [-1, 1]) return tf.nn.sampled_softmax_loss(w_t, b, inputs, labels, num_samples, dictsize) # Declares the softmax loss function for training softmax_loss_function = sampled_loss # Seq2Seq model rnn, state = seq2seq_gpu.embedding_attention_seq2seq( teminp, temoutput, cell, dictsize, dictsize, 1000, output_projection=output_projection, feed_previous=False) rnnoutputs = [tf.matmul(word, w) + b for word in rnn] # Loss function to train the model logits = tf.nn.seq2seq.sequence_loss( rnn, temtarget, W1_0, softmax_loss_function=softmax_loss_function) tf.scalar_summary("Loss", logits) # Optimizer to change the weights in the model train = tf.train.AdagradOptimizer(0.1).minimize(logits) # Saves the model after training saver = tf.train.Saver() # GPU config files to control memory usage config = tf.ConfigProto() config.gpu_options.allow_growth = True # Initializes all the variables and creates all the Tensorflow objects above init_op = tf.initialize_all_variables() sess = tf.InteractiveSession(config=config) sess.run(init_op) # Takes data for training to be easily viewed merged = tf.merge_all_summaries() writer = tf.train.SummaryWriter(path + "graph", sess.graph) # Starts the treads for training coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) # If true will restore the model from a previous version if restore: print("Restoring Model") saver.restore(sess, str(path) + "model.ckpt") # Training loop for i in range(trainiterations): # Gets the time to visualize training times cutime = time.time() * 1000 # Gets the data from the dialogs file data, outputs = sess.run([features, labels]) # Lists of the data to be trained databatch = [] labelsbatch = [] correctoutputsbatch = [] # Loop to turn each word in the training sentences into integer arrays for line in data: # Splits the data by word data = re.split("\s", line) # Creates a list of the word integers tempdata = [] # Fills the tempdata list with the word integers for word in data: if dictionary.get(word) is not None: tempdata.append(dictionary[word]) else: tempdata.append(dictionary["UKN"]) # Fills the rest of the empty spaces with null values for p in range(in_seq_size - len(tempdata)): tempdata.append(dictionary["NULL"]) # Reverses the integers This has been show to make the model better tempdata.reverse() # Adds this sentence to the batch databatch.append(tempdata) # Loop to turn each word in the training sentences into integer arrays for line in outputs: # Splits the data by word outputs = re.split("\s", line) # Creates a list of the word integers outputs.insert(0, "GO") tempoutputs = [] # Fills the tempoutputs list with the word integers for word in outputs: if dictionary.get(word) is not None: tempoutputs.append(dictionary[word]) # Fills the rest of the empty spaces with null values for p in range(out_seq_size - len(tempoutputs)): tempoutputs.append(dictionary["NULL"]) # Makes the correct outputs to train the model on correctoutputs = [ tempoutputs[k + 1] for k in range(len(tempoutputs) - 1) ] correctoutputs = np.append(np.array(correctoutputs), dictionary["NULL"]) # Adds the lists to the batches for training labelsbatch.append(tempoutputs) correctoutputsbatch.append(correctoutputs) # Makes the batches into arrays to be used by Tensorflow databatch = np.array(databatch) labelsbatch = np.array(labelsbatch) correctoutputsbatch = np.array(correctoutputsbatch) # Training action to change the weights of the model summery, _ = sess.run( [merged, train], feed_dict={ x: databatch, y: labelsbatch, targets: correctoutputsbatch, W1: np.ones([batch_size, out_seq_size], dtype=np.float32), keep: 0.5 }) #print(sess.run(tf.get_default_graph().get_tensor_by_name("embedding_attention_seq2seq/embedding_attention_decoder/embedding:0"))) # Writes data to a file to be viewed writer.add_summary(summery, global_step=i) if dout: tempout = sess.run(rnnoutputs, feed_dict={ x: databatch, y: labelsbatch, keep: 1.0 }) tempdata = np.split(np.array(tempout), batch_size, 1) data = [] for sent in tempdata: temdata = [] for word in sent: temdata.append(rvsdictionary[np.argmax(word)]) temdata = [item for item in temdata if item != 'NULL'] data.append(temdata) print(data) print("Time: " + str((time.time() * 1000) - cutime) + " Iteration: " + str(i)) if i % 10000 == 0 and i is not 0: saver.save(sess, str(path) + "model.ckpt", global_step=i) print("Model Saved") saver.save(sess, str(path) + "model.ckpt") coord.request_stop() coord.join(threads)
# data csv files train_csv_dir = "/mnt/hdd3t/Data/hci1/hoon/LightHouse_of_Inha/CSVs/3th/size/train_G_size.csv" test_csv_dir = "/mnt/hdd3t/Data/hci1/hoon/LightHouse_of_Inha/CSVs/3th/size/test_G_size.csv" image_height = 299 image_width = 299 train_batch_size = 32 # batch size test_batch_size = 16 num_out = 3 # number of output result # train data load train_queue = tf.train.string_input_producer([train_csv_dir]) train_reader = tf.TextLineReader() _, train_csv_value = train_reader.read(train_queue) train_img_dir, train_label, train_gender = tf.decode_csv(train_csv_value, record_defaults=[[""], [-1], [-1]]) train_img_value = tf.read_file(train_img_dir) train_img = tf.reshape(tf.cast(tf.image.decode_jpeg(train_img_value, channels=3), dtype=tf.float32), shape=[image_height, image_width, 3]) train_label = tf.reshape(tf.one_hot(train_label, depth=num_out, on_value=1.0, off_value=0.0), shape=[num_out]) train_gender = tf.reshape(train_gender, shape=[1]) # test data load test_queue = tf.train.string_input_producer([test_csv_dir], shuffle=False) test_reader = tf.TextLineReader() _, test_csv_value = test_reader.read(test_queue) test_img_dir, test_label, test_gender = tf.decode_csv(test_csv_value, record_defaults=[[""], [-1], [-1]]) test_img_value = tf.read_file(test_img_dir) test_img = tf.reshape(tf.cast(tf.image.decode_jpeg(test_img_value, channels=3), dtype=tf.float32), shape=[image_height, image_width, 3]) test_label = tf.reshape(tf.one_hot(test_label, depth=num_out, on_value=1.0, off_value=0.0), shape=[num_out]) test_gender = tf.reshape(test_gender, shape=[1])
def ex4(): # tf.train.string_input_producer()으로 필요 파일들을 # 랜덤(T,F 설정 가능)으로 filename queue 에 추가(enqueue) filename_queue = tf.train.string_input_producer( string_tensor=['test-score.csv'], shuffle=False, name='filename_queue') # 데이타 포맷 (csv, tfrecord, TextLineReader 등) 에 맞는 reader 를 통해서 # filename queue 에서 dequeue 된 파일들을 value에 담는다. reader = tf.TextLineReader() key, value = reader.read(filename_queue) # Default values, in case of empty columns. Also specifies the type of the # decoded result. # value들을 csv로 decode 한다. # http://bcho.tistory.com/1165?category=555440 record_defaults = [[0.], [0.], [0.], [0.]] # (csv 형식) xy = tf.decode_csv(value, record_defaults=record_defaults) # collect batches of csv in train_x_batch, train_y_batch = tf.train.batch([xy[0:-1], xy[-1:]], batch_size=25) # placeholders for a tensor that will be always fed. X = tf.placeholder(tf.float32, shape=[None, 3]) Y = tf.placeholder(tf.float32, shape=[None, 1]) W = tf.Variable(tf.random_normal([3, 1]), name='weight') b = tf.Variable(tf.random_normal([1]), name='bias') # Hypothesis hypothesis = tf.matmul(X, W) + b # Simplified cost/loss function cost = tf.reduce_mean(tf.square(hypothesis - Y)) # Minimize optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-5) train = optimizer.minimize(cost) # Launch the graph in a session. sess = tf.Session() # Initializes global variables in the graph. sess.run(tf.global_variables_initializer()) # Start populating the filename queue. coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) x_batch, y_batch = sess.run([train_x_batch, train_y_batch]) for step in range(2001): cost_val, hy_val, _ = sess.run([cost, hypothesis, train], feed_dict={ X: x_batch, Y: y_batch }) if step % 10 == 0: print(step, "Cost: ", cost_val, "\nPrediction:\n", hy_val) coord.request_stop() coord.join(threads) # Ask my score print("Your score will be ", sess.run(hypothesis, feed_dict={X: [[100, 70, 101]]})) print("Other scores will be ", sess.run(hypothesis, feed_dict={X: [[60, 70, 110], [90, 100, 80]]})) '''
def decode_csv(value_column): columns = tf.decode_csv(records = value_column, record_defaults = DEFAULTS) features = dict(zip(CSV_COLUMNS, columns)) label = features.pop(LABEL_COLUMN) return features, label
def iris_parser(record): record_types = [tf.float32, tf.float32, tf.float32, tf.float32, tf.int32, tf.int32, tf.int32] line = tf.decode_csv(record, record_types, field_delim=',') return line
preprocessing.resize() # Resize image to 64 by 64 and change it to grayscale preprocessing.make_csv() # Create labeled image csv # hyper parameter IMAGE_WIDTH = 64 IMAGE_HEIGHT = 64 BATCH_SIZE = 125 NUM_CLASSES = 2 CHECK_POINT_DIR = TB_SUMMARY_DIR = "./tensor_board/" # read csv csv_file = tf.train.string_input_producer(["./label.csv"], shuffle=True) csv_reader = tf.TextLineReader() _, line = csv_reader.read(csv_file) image_file, label_decoded = tf.decode_csv(line, record_defaults=[[""], [""]]) image_decoded = tf.image.decode_jpeg(tf.read_file(image_file), channels=1) image_cast = tf.cast(image_decoded, tf.float32) image = tf.reshape(image_cast, [IMAGE_WIDTH, IMAGE_HEIGHT, 1]) # 64 by 64 , grayscale test_batch = int(12500 / BATCH_SIZE) test_image_list = [ './resize_test/' + file_name for file_name in os.listdir('./resize_test/') ] test_image_reader = tf.WholeFileReader() test_image_name = tf.train.string_input_producer(test_image_list) _, value = test_image_reader.read(test_image_name) test_image_decode = tf.cast(tf.image.decode_jpeg(value, channels=1), tf.float32) test_image = tf.reshape(test_image_decode, [IMAGE_WIDTH, IMAGE_HEIGHT, 1])
def parse_batch(value_column): columns = tf.decode_csv(value_column, record_defaults = DEFAULTS) features = dict(zip(CSV_COLUMNS, columns)) label = features.pop(LABEL_COLUMN) return add_engineered(features), label
import tensorflow as tf tf.set_random_seed(777) filename_queue = tf.train.string_input_producer(['data-01-test-score.csv'], shuffle=False, name='filename_queue') reader = tf.TextLineReader() key, value = reader.read(filename_queue) record_defaults = [[0.], [0.], [0.], [0.]] xy = tf.decode_csv(value, record_defaults=record_defaults) train_x_batch, train_y_batch = tf.train.batch([xy[0:-1], xy[-1:]], batch_size=10) X = tf.placeholder(tf.float32, shape=[None, 3]) Y = tf.placeholder(tf.float32, shape=[None, 1]) W = tf.Variable([[0.], [0.], [0.]], name='weight') b = tf.Variable([0.], name='bias') hypothesis = tf.matmul(X, W) + b cost = tf.reduce_mean(tf.square(hypothesis - Y)) optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-5) train = optimizer.minimize(cost) sess = tf.Session()
''' import data ''' directory = "./*.csv" filename = "./result/estr3108/resultbh1n1.csv" csvfile = file(filename, 'wb') writer = csv.writer(csvfile) filename_queue = tf.train.string_input_producer( tf.train.match_filenames_once(directory), shuffle=True) line_reader = tf.TextLineReader(skip_header_lines=1) _, csv_row = line_reader.read(filename_queue) record_defaults = [[0], [""]] min = 5000 capacity = min + 5 * 100 labelnn, featuress = tf.decode_csv(csv_row, record_defaults=record_defaults) labeln, features = tf.train.shuffle_batch([labelnn, featuress], batch_size=100, min_after_dequeue=min, capacity=capacity) ''' construct the model ''' # layer 1 y_ = tf.placeholder(tf.float32, shape=[None, types]) x = tf.placeholder(tf.float32, shape=[None, 4, length]) xd = tf.reshape(x, [-1, 4, length, 1]) W_conv1 = weight_variable(sh_w1) b_conv1 = bias_variable([out1]) h_conv1 = tf.nn.relu(conv2d(xd, W_conv1) + b_conv1) h_pool1 = maxpool(h_conv1)
import tensorflow filename_queue = tensorflow.train.string_input_producer( ['data-01-test-score.csv'], shuffle=False, name='filename-queue') reader = tensorflow.TextLineReader() key, value = reader.read(filename_queue) record_defaults = [[0.], [0.], [0.], [0.]] xy = tensorflow.decode_csv(value, record_defaults=record_defaults) train_x_batch, train_y_batch = tensorflow.train.batch([xy[0:-1], xy[-1:]], batch_size=10) X = tensorflow.placeholder(tensorflow.float32, shape=[None, 3]) Y = tensorflow.placeholder(tensorflow.float32, shape=[None, 1]) W = tensorflow.Variable(tensorflow.random_normal([3, 1]), name='weight') b = tensorflow.Variable(tensorflow.random_normal([1]), name='bias') hypothesis = tensorflow.matmul(X, W) + b cost = tensorflow.reduce_mean(tensorflow.square(hypothesis - Y)) optimizer = tensorflow.train.GradientDescentOptimizer(learning_rate=1e-5) train = optimizer.minimize(cost) session = tensorflow.Session() session.run(tensorflow.global_variables_initializer()) coordinator = tensorflow.train.Coordinator()