def read_csv_examples(image_dir, label_dir, batch_size=100, num_epochs=None, task_index=None, num_workers=None):
    print_log(worker_num, "num_epochs: {0}".format(num_epochs))
    # Setup queue of csv image filenames
    tf_record_pattern = os.path.join(image_dir, 'part-*')
    images = tf.gfile.Glob(tf_record_pattern)
    print_log(worker_num, "images: {0}".format(images))
    image_queue = tf.train.string_input_producer(images, shuffle=False, capacity=1000, num_epochs=num_epochs, name="image_queue")

    # Setup queue of csv label filenames
    tf_record_pattern = os.path.join(label_dir, 'part-*')
    labels = tf.gfile.Glob(tf_record_pattern)
    print_log(worker_num, "labels: {0}".format(labels))
    label_queue = tf.train.string_input_producer(labels, shuffle=False, capacity=1000, num_epochs=num_epochs, name="label_queue")

    # Setup reader for image queue
    img_reader = tf.TextLineReader(name="img_reader")
    _, img_csv = img_reader.read(image_queue)
    image_defaults = [ [1.0] for col in range(784) ]
    img = tf.pack(tf.decode_csv(img_csv, image_defaults))
    # Normalize values to [0,1]
    norm = tf.constant(255, dtype=tf.float32, shape=(784,))
    image = tf.div(img, norm)
    print_log(worker_num, "image: {0}".format(image))

    # Setup reader for label queue
    label_reader = tf.TextLineReader(name="label_reader")
    _, label_csv = label_reader.read(label_queue)
    label_defaults = [ [1.0] for col in range(10) ]
    label = tf.pack(tf.decode_csv(label_csv, label_defaults))
    print_log(worker_num, "label: {0}".format(label))

    # Return a batch of examples
    return tf.train.batch([image,label], batch_size, num_threads=args.readers, name="batch_csv")
Beispiel #2
0
def read_fer2013(eval_data):
    """
    Read and parse the examples from the FER2013 data file
    
    Args:
        eval_data: boolean indicating whether we are using training or evaluation data
    
    Returns:
        A single example contained in an object with fields:
            height: number of rows
            width: number of columns
            depth: number of colour channels
            key: filename and record number for the example
            label: an int32 Tensor with the label in the range 0..6
            image: a [height, width, depth] int32 Tensor with the image data
    """

    class FER2013Record(object):
        pass
    result = FER2013Record()

    # Dataset dimensions
    result.height = 48
    result.width = 48
    result.depth = 1

    # Set up the reader
    filename = tf.train.string_input_producer(["FER2013 data/fer2013/fer2013.csv"])

    # read from the data file
    # training data starts on line 2 (single header line)
    # test data starts after the training data
    skip_lines = 1
    if eval_data:
        skip_lines = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN
    reader = tf.TextLineReader(skip_header_lines=skip_lines)

    # Read a line corresponding to an example
    result.key, value = reader.read(filename)

    # Decode the line according to its formatting
    def1 = [[0],["Empty"],["Empty"]]
    result.label, image, result.testOrTrain = tf.decode_csv(value, def1)
    # The middle column corresponds to the image data of 48x48 = 2304
    # The data is space separated hence field_delim=' '
    def2 = [[0]]*(result.height*result.width)
    image = tf.decode_csv(image, def2, field_delim=' ')
    
    image = tf.reshape(image, [result.height, result.width, -1])
    result.image = tf.cast(image, tf.uint8)

    return result
    def test_inputs(self, csv, batch_size):
        print("input csv file path: %s, batch size: %d" % (csv, batch_size))
        filename_queue = tf.train.string_input_producer([csv], shuffle=False)
        reader = tf.TextLineReader()
        _, serialized_example = reader.read(filename_queue)
        filename, label = tf.decode_csv(serialized_example, [["path"], [0]])

        label = tf.cast(label, tf.int32)
        jpg = tf.read_file(filename)
        image = tf.image.decode_jpeg(jpg, channels=3)
        image = tf.cast(image, tf.float32)
        print "original image shape:"
        print image.get_shape()

        # resize to distort
        dist = tf.image.resize_images(image, FLAGS.scale_h, FLAGS.scale_w)
        # random crop
        dist = tf.image.resize_image_with_crop_or_pad(dist, FLAGS.input_h, FLAGS.input_w)

        min_fraction_of_examples_in_queue = 0.4
        min_queue_examples = int(FLAGS.num_examples_per_epoch_for_train * min_fraction_of_examples_in_queue)
        print (
        'filling queue with %d train images before starting to train.  This will take a few minutes.' % min_queue_examples)

        return self._generate_image_and_label_batch(dist, label, min_queue_examples, batch_size, shuffle=False)
Beispiel #4
0
def read(filename_queue):
  class Record(object):
    pass
  result = Record()

  reader = tf.TextLineReader()
  result.key, line = reader.read(filename_queue)

  #sess = tf.Session()
  #print(line[0].eval(session=sess), line[1].eval(session=sess))
  #sess.close()

  #print(line.get_shape())
  record_defaults = [[0] for _ in xrange(2305)]
  columns = tf.decode_csv(line, record_defaults=record_defaults)
  #print("PRINT: " , len(columns))
  x = tf.pack(columns[1:])
  cls = columns[0]
  result.height = 48
  result.width = 48
  result.label = tf.cast(cls, tf.int32)
  depth_major = tf.reshape(x, [result.height, result.width, 1])
  three_chann = tf.concat(2, [depth_major, depth_major, depth_major])
  print(three_chann.get_shape())
  result.image = three_chann
  return result
  def _input_fn():
    num_epochs = 100 if mode == tf.contrib.learn.ModeKeys.TRAIN else 1

    # could be a path to one file or a file pattern.
    input_file_names = tf.train.match_filenames_once(filename)
    filename_queue = tf.train.string_input_producer(
        input_file_names, num_epochs=num_epochs, shuffle=True)

    reader = tf.TextLineReader()
    _, value = reader.read_up_to(filename_queue, num_records=BATCH_SIZE)

    value_column = tf.expand_dims(value, -1)
    print 'readcsv={}'.format(value_column)
    
    # all_data is a list of tensors
    all_data = tf.decode_csv(value_column, record_defaults=DEFAULTS)  
    inputs = all_data[:len(all_data)-N_OUTPUTS]  # first few values
    label = all_data[len(all_data)-N_OUTPUTS : ] # last few values
    
    # from list of tensors to tensor with one more dimension
    inputs = tf.concat(inputs, axis=1)
    label = tf.concat(label, axis=1)
    print 'inputs={}'.format(inputs)
    
    return {TIMESERIES_COL: inputs}, label   # dict of features, label
Beispiel #6
0
def multi_reader_multi_example():
    # create a FIFO queue
    filenames = ['a.csv', 'b.csv', 'c.csv']
    filename_queue = tf.train.string_input_producer(filenames, shuffle=False)

    # create reader
    reader = tf.TextLineReader()
    key, value = reader.read(filename_queue)

    record_defaults = [['null'], ['null']]
    example_list = [tf.decode_csv(value, record_defaults=record_defaults) for _ in range(2)]

    example_batch, label_batch = tf.train.batch_join(example_list, batch_size=5)

    # run graph
    with tf.Session() as sess:
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)

        try:
            while not coord.should_stop():
                print(example_batch.eval())
        except tf.errors.OutOfRangeError:
            print('epoches completed!')
        finally:
            coord.request_stop()

        coord.join(threads)
def read_image_unlabeled(filename_queue, raw_img):
  class StatefarmRecord(object):
    pass
  result = StatefarmRecord()

  # Read a record, getting filenames from the filename_queue.  
  result.key, _ = tf.decode_csv(filename_queue.dequeue(), [[""], [""]], " ")

  # Extract raw JPG data as a string
  # raw_contents = tf.read_file(result.key)
  # raw_contents = raw_img

  # Decode raw data as a PNG. Defaults to uint8 encoding.
  # result.uint8image = tf.image.decode_png(raw_contents)
  result.uint8image = raw_img.astype('uint8')

  # TENSORFLOW BUG: image shape not statically determined, so force
  # it to have correct CIFAR100 dimensions
  # result.uint8image.set_shape((32, 32, 3))

  # Kind of hacky, but set a label so we can use the same structure
  # THIS SHOULD ALWAYS BE IGNORED DURING COMPUTATION, since we are
  # dealing with unlabaled data
  result.label = tf.cast(tf.string_to_number("0"), tf.int32)

  return result
Beispiel #8
0
 def parse_csv(value):
   tf.logging.info('Parsing {}'.format(data_file))
   columns = tf.decode_csv(value, record_defaults=_CSV_COLUMN_DEFAULTS)
   features = dict(zip(_CSV_COLUMNS, columns))
   labels = features.pop('income_bracket')
   classes = tf.equal(labels, '>50K')  # binary classification
   return features, classes
 def decode_csv(line):
     parsed_line = tf.decode_csv(line, [[0.], [0.], [0.], [0.], [0]])
     label = parsed_line[-1:]  # Last element is the label
     del parsed_line[-1]  # Delete last element
     features = parsed_line  # Everything but last elements are the features
     d = dict(zip(feature_names, features)), label
     return d
def filequeue_to_batch_data(filename_queue, line_reader, batch_size = BATCH_SIZE):
   # The text file format should be Query Image, Trieve Image, Query label,
   # Trieve Label, Triplet loss Label( 0/1 ) 

    key, next_line = line_reader.read(filename_queue)
    query_image_name, retrieve_image_name, label_1, label_2, label_3 = tf.decode_csv(
        next_line, [tf.constant([], dtype=tf.string),tf.constant([], dtype=tf.string),
            tf.constant([], dtype = tf.int32), tf.constant([], dtype = tf.int32), tf.constant([], dtype = tf.int32)], field_delim=" ")
    
    # batch_query_image, batch_label = tf.train.batch(
    #         [query_image_name, label], batch_size=batch_size)

    reverse_channel = True  # for pre-trained purpose
    query_tensor = image_io.read_image(query_image_name, reverse_channel,   
            FEATURE_ROW, FEATURE_COL)

    retrieve_tensor = image_io.read_image(retrieve_image_name, reverse_channel,   
            FEATURE_ROW, FEATURE_COL)

    if SHUFFLE_DATA:
        min_after_dequeue = 100
        capacity = min_after_dequeue + 3 * batch_size
        batch_query_tensor, batch_retrieve_tensor, batch_label_1, batch_label_2, batch_label_3  = tf.train.shuffle_batch(
                [query_tensor, retrieve_tensor, label_1, label_2, label_3], batch_size = batch_size, capacity=capacity,
                min_after_dequeue=min_after_dequeue)
    else:
        batch_query_tensor,batch_retrieve_tensor, batch_label_1, batch_label_2, batch_label_3 = tf.train.batch(
                [query_tensor, retrieve_tensor, label_1, label_2, label_3], batch_size=batch_size)
    
    batch_tensor = tf.concat(0, [batch_query_tensor, batch_retrieve_tensor]) 
    batch_label = tf.concat(0, [batch_label_1, batch_label_2])
    return batch_tensor, batch_label, batch_label_3
def record_to_labeled_log_mel_examples(csv_record, clip_dir=None, hparams=None,
                                       label_class_index_table=None, num_classes=None):
  """Creates a batch of log mel spectrum examples from a training record.

  Args:
    csv_record: a line from the train.csv file downloaded from Kaggle.
    clip_dir: path to a directory containing clips referenced by csv_record.
    hparams: tf.contrib.training.HParams object containing model hyperparameters.
    label_class_index_table: a lookup table that represents the class map.
    num_classes: number of classes in the class map.

  Returns:
    features: Tensor containing a batch of log mel spectrum examples.
    labels: Tensor containing corresponding labels in 1-hot format.
  """
  [clip, label, _] = tf.decode_csv(csv_record, record_defaults=[[''],[''],[0]])

  features = clip_to_log_mel_examples(clip, clip_dir=clip_dir, hparams=hparams)

  class_index = label_class_index_table.lookup(label)
  label_onehot = tf.one_hot(class_index, num_classes)
  num_examples = tf.shape(features)[0]
  labels = tf.tile([label_onehot], [num_examples, 1])

  return features, labels
Beispiel #12
0
 def parse_csv(line):
     print("Parsing", data_file)
     # tf.decode_csv会把csv文件转换成很a list of Tensor,一列一个。record_defaults用于指明每一列的缺失值用什么填充
     columns = tf.decode_csv(line, record_defaults=_CSV_COLUMN_DEFAULTS)
     features = dict(zip(_CSV_COLUMNS, columns))
     labels = features.pop('income_bracket')
     return features, tf.equal(labels, '>50K') # tf.equal(x, y) 返回一个bool类型Tensor, 表示x == y, element-wise
Beispiel #13
0
def read_pascifar(pascifar_path, queue):
    """ Reads and parses files from the queue.
    Args:
        pascifar_path: a constant string tensor representing the path of the PASCIFAR dataset
        queue: A queue of strings in the format: file, label

    Returns:
        image_path: a tf.string tensor. The absolute path of the image in the dataset
        label: a int64 tensor with the label
    """

    # Reader for text lines
    reader = tf.TextLineReader(skip_header_lines=1)

    # read a record from the queue
    _, row = reader.read(queue)

    # file,width,height,label
    record_defaults = [[""], [0]]

    image_path, label = tf.decode_csv(row, record_defaults, field_delim=",")

    image_path = pascifar_path + tf.constant("/") + image_path
    label = tf.cast(label, tf.int64)
    return image_path, label
def filequeue_to_batch_data(filename_queue, line_reader, batch_size = BATCH_SIZE):
    
    key, next_line = line_reader.read(filename_queue)
    query_image_name, label = tf.decode_csv(
        next_line, [tf.constant([], dtype=tf.string),
            tf.constant([], dtype = tf.int32)], field_delim=" ")
    
    # batch_query_image, batch_label = tf.train.batch(
    #         [query_image_name, label], batch_size=batch_size)

    reverse_channel = True  # for pre-trained purpose
    query_tensor = image_io.read_image(query_image_name, reverse_channel,   
            FEATURE_ROW, FEATURE_COL)

    if SHUFFLE_DATA:
        min_after_dequeue = 100
        capacity = min_after_dequeue + 3 * batch_size
        batch_query_image, batch_label = tf.train.shuffle_batch(
                [query_tensor, label], batch_size = batch_size, capacity=capacity,
                min_after_dequeue=min_after_dequeue)
    else:
        batch_query_image, batch_label = tf.train.batch(
                [query_tensor, label], batch_size=batch_size)
    
    
    return batch_query_image, batch_label
Beispiel #15
0
def read_tensors_from_csv(file_name, defaults=None, num_columns=None, batch_size=1, num_epochs=None,
                          delimiter=',', randomize_input=True, num_threads=4):
    if file_name is None:
        raise ValueError(
            "Invalid file_name. file_name cannot be empty.")

    if defaults is None and num_columns is None:
        raise ValueError(
            "At least one of defaults and num_columns should not be None.")

    if defaults is None:
        defaults = [0.0 for _ in range(num_columns)]

    record_defaults = [[item] for item in defaults]

    examples = tf.contrib.learn.read_batch_examples(
        file_pattern=file_name,
        batch_size=batch_size,
        reader=tf.TextLineReader,
        randomize_input=randomize_input,
        num_threads=num_threads,
        num_epochs=num_epochs)

    columns = tf.decode_csv(
        examples, record_defaults=record_defaults, field_delim=delimiter)

    return columns
def build_csv_serving_tensors_for_transform_step(analysis_path,
                                                 features,
                                                 schema,
                                                 stats,
                                                 keep_target):
  """Builds a serving function starting from raw csv.

  This should only be used by transform.py (the transform step), and the

  For image columns, the image should be a base64 string encoding the image.
  The output of this function will transform that image to a 2048 long vector
  using the inception model.
  """

  csv_header, record_defaults = csv_header_and_defaults(features, schema, stats, keep_target)

  placeholder = tf.placeholder(dtype=tf.string, shape=(None,),
                               name='csv_input_placeholder')
  tensors = tf.decode_csv(placeholder, record_defaults)
  raw_features = dict(zip(csv_header, tensors))

  transform_fn = make_preprocessing_fn(analysis_path, features, keep_target)
  transformed_tensors = transform_fn(raw_features)

  transformed_features = {}
  # Expand the dims of non-sparse tensors
  for k, v in six.iteritems(transformed_tensors):
    if isinstance(v, tf.Tensor) and v.get_shape().ndims == 1:
      transformed_features[k] = tf.expand_dims(v, -1)
    else:
      transformed_features[k] = v

  return input_fn_utils.InputFnOps(
      transformed_features, None, {"csv_example": placeholder})
Beispiel #17
0
  def _decode_csv(line):
    """Takes the string input tensor and parses it to feature dict and target.

    All the columns except the first one are treated as feature column. The
    first column is expected to be the target.
    Only returns target for if with_target is True.

    Args:
      line: csv rows in tensor format.

    Returns:
      features: A dictionary of features with key as "column_names" from
        self._column_header.
      target: tensor of target values which is the first column of the file.
        This will only be returned if with_target==True.
    """
    column_header = column_names if with_target else column_names[:4]
    record_defaults = [[0.] for _ in xrange(len(column_names) - 1)]
    # Pass label as integer.
    if with_target:
      record_defaults.append([0])
    columns = tf.decode_csv(line, record_defaults=record_defaults)
    features = dict(zip(column_header, columns))
    target = features.pop(column_names[4]) if with_target else None
    return features, target
 def decode_csv(line):
     parsed_line = tf.decode_csv(line, [[0.], [0.], [0.], [0.], [0]])
     label = parsed_line[-1:]  
     del parsed_line[-1]  
     features = parsed_line  
     d = dict(zip(feature_names, features)), label
     return d
Beispiel #19
0
  def _input_fn():
    BATCH_SIZE = 40
    filename_queue = tf.train.string_input_producer([filename])
    reader = tf.TextLineReader()
    key, value = reader.read_up_to(filename_queue, num_records=BATCH_SIZE)

    record_defaults = [[0], [" "], [0], [" "], [0],
                       [" "], [" "], [" "], [" "], [" "],
                       [0], [0], [0], [" "], [" "]]
    columns = tf.decode_csv(
        value, record_defaults=record_defaults)

    features = dict(zip(COLUMNS, columns))

    # save our label
    income_bracket = features.pop('income_bracket')
    
    # remove the fnlwgt key, which is not used
    features.pop('fnlwgt', 'fnlwgt key not found')

    # works in 0.12 only
    for feature_name in CATEGORICAL_COLUMNS:
      features[feature_name] = tf.expand_dims(features[feature_name], -1)

    income_int = tf.to_int32(tf.equal(income_bracket, " >50K"))

    return features, income_int
def parse_record(record):
    columns = tf.decode_csv(record, record_defaults=commons.HEADER_DEFAULTS, field_delim='\t')
    features = columns[0]
    target = columns[1:]
    target = tf.cast(tf.string_to_number(target), dtype=tf.int32)
    target = tf.stack(target, axis=0)
    return {commons.FEATURE_COL: features}, target
Beispiel #21
0
def get_input(input_file, batch_size, im_size=224):
    input = DATA_DIR + 'SegNet/SiftFlow/' + input_file
    filenames = []
    with open(input, 'r') as f:
    	for line in f:
	        filenames.append('{}/{}'.format(
	        					DATA_DIR, line.strip()))
    # filenames.append('{}/{}.jpg {}'.format(
    #     DATA_DIR, line.strip(),
    #     line.strip()))

    filename_queue = tf.train.string_input_producer(filenames)
    filename, label_dir = tf.decode_csv(filename_queue.dequeue(), [[""], [""]], " ")

    label = label_dir;

    file_contents = tf.read_file(filename)
    im = tf.image.decode_jpeg(file_contents)
    im = tf.image.resize_images(im, im_size, im_size, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
    im = tf.reshape(im, [im_size, im_size, 3])
    im = tf.to_float(im)
    im_mean = tf.constant([122.67892, 116.66877, 104.00699], dtype=tf.float32)
    im = tf.sub(im, im_mean)
    # im = tf.image.per_image_whitening(im)
    # im = tf.image.per_image_whitening(im)
    min_queue_examples = int(10000 * 0.4)
    example_batch, lbl_batch = tf.train.batch([im, label],
                                            num_threads=1,
                                            batch_size=batch_size,
                                            capacity=min_queue_examples + 3 * batch_size)
    return example_batch, lbl_batch
Beispiel #22
0
 def read(filename_queue):
   value = filename_queue.dequeue()
   fpath, label = tf.decode_csv(
       value, record_defaults=[[''], ['']],
       field_delim=' ')
   image_buffer = tf.read_file(fpath)
   return [image_buffer, label]
Beispiel #23
0
def _decode(example_batch):
    """Decode a batch of CSV lines into a feature map."""

    if FLAGS.is_predicting:
        record_defaults = [[0.0], [""], [0.0], [""], [0.0], [""], [""], [""],
                           [""], [""], [0.0], [0.0], [0.0], [""]]
    else:
        record_defaults = [[0.0], [""], [0.0], [""], [0.0], [""], [""], [""],
                           [""], [""], [0.0], [0.0], [0.0], [""], [""]]

    fields = tf.decode_csv(example_batch, record_defaults, field_delim=',')
    if FLAGS.is_predicting:
        data = {LABEL: tf.constant("")}
    else:
        data = {LABEL: fields[14]}

    data["age"] = fields[0]
    data["workclass"] = fields[1]
    data["fnlwgt"] = fields[2]
    data["education"] = fields[3]
    data["education-num"] = fields[4]
    data["marital-status"] = fields[5]
    data["occupation"] = fields[6]
    data["relationship"] = fields[7]
    data["race"] = fields[8]
    data["sex"] = fields[9]
    data["capital-gain"] = fields[10]
    data["capital-loss"] = fields[11]
    data["hours-per-week"] = fields[12]
    data["native-country"] = fields[13]

    return data
Beispiel #24
0
def parse_csv(csv_row, is_serving=False):
  """Takes the string input tensor (csv) and returns a dict of rank-2 tensors.

  Takes a rank-1 tensor and converts it into rank-2 tensor, with respect to
  its data type (inferred from the metadata).

  Args:
    csv_row: rank-2 tensor of type string (csv).
    is_serving: boolean to indicate whether this function is called during
      serving or training, since the csv_row serving input is different than
      the training input (i.e., no target column).
  Returns:
    rank-2 tensor of the correct data type.
  """
  if is_serving:
    column_names = metadata.SERVING_COLUMN_NAMES
    defaults = []
    # create the defaults for the serving columns.
    for serving_feature in metadata.SERVING_COLUMN_NAMES:
      feature_index = metadata.COLUMN_NAMES.index(serving_feature)
      defaults.append(metadata.DEFAULTS[feature_index])
  else:
    column_names = metadata.COLUMN_NAMES
    defaults = metadata.DEFAULTS

  columns = tf.decode_csv(csv_row, record_defaults=defaults)
  features = dict(zip(column_names, columns))

  return features
Beispiel #25
0
  def raw_training_input_fn():
    """Training input function that reads raw data and applies transforms."""

    if isinstance(raw_data_file_pattern, six.string_types):
      filepath_list = [raw_data_file_pattern]
    else:
      filepath_list = raw_data_file_pattern

    files = []
    for path in filepath_list:
      files.extend(file_io.get_matching_files(path))

    filename_queue = tf.train.string_input_producer(
        files, num_epochs=num_epochs, shuffle=randomize_input)

    csv_id, csv_lines = tf.TextLineReader().read_up_to(filename_queue, training_batch_size)

    queue_capacity = (reader_num_threads + 3) * training_batch_size + min_after_dequeue
    if randomize_input:
      _, batch_csv_lines = tf.train.shuffle_batch(
          tensors=[csv_id, csv_lines],
          batch_size=training_batch_size,
          capacity=queue_capacity,
          min_after_dequeue=min_after_dequeue,
          enqueue_many=True,
          num_threads=reader_num_threads,
          allow_smaller_final_batch=allow_smaller_final_batch)

    else:
      _, batch_csv_lines = tf.train.batch(
          tensors=[csv_id, csv_lines],
          batch_size=training_batch_size,
          capacity=queue_capacity,
          enqueue_many=True,
          num_threads=reader_num_threads,
          allow_smaller_final_batch=allow_smaller_final_batch)

    csv_header, record_defaults = csv_header_and_defaults(features, schema, stats, keep_target=True)
    parsed_tensors = tf.decode_csv(batch_csv_lines, record_defaults, name='csv_to_tensors')
    raw_features = dict(zip(csv_header, parsed_tensors))

    transform_fn = make_preprocessing_fn(analysis_output_dir, features, keep_target=True)
    transformed_tensors = transform_fn(raw_features)

    # Expand the dims of non-sparse tensors. This is needed by tf.learn.
    transformed_features = {}
    for k, v in six.iteritems(transformed_tensors):
      if isinstance(v, tf.Tensor) and v.get_shape().ndims == 1:
        transformed_features[k] = tf.expand_dims(v, -1)
      else:
        transformed_features[k] = v

    # Remove the target tensor, and return it directly
    target_name = get_target_name(features)
    if not target_name or target_name not in transformed_features:
      raise ValueError('Cannot find target transform in features')

    transformed_target = transformed_features.pop(target_name)

    return transformed_features, transformed_target
Beispiel #26
0
def parse_example_tensor(examples, train_config, keep_target):
  """Read the csv files.

  Args:
    examples: string tensor
    train_config: training config
    keep_target: if true, the target column is expected to exist and it is
        returned in the features dict.

  Returns:
    Dict of feature_name to tensor. Target feature is in the dict.
  """

  csv_header = []
  if keep_target:
    csv_header = train_config['csv_header']
  else:
    csv_header = [name for name in train_config['csv_header']
                  if name != train_config['target_column']]

  # record_defaults are used by tf.decode_csv to insert defaults, and to infer
  # the datatype.
  record_defaults = [[train_config['csv_defaults'][name]]
                     for name in csv_header]
  tensors = tf.decode_csv(examples, record_defaults, name='csv_to_tensors')

  # I'm not really sure why expand_dims needs to be called. If using regression
  # models, it errors without it.
  tensors = [tf.expand_dims(x, axis=1) for x in tensors]

  tensor_dict = dict(zip(csv_header, tensors))
  return tensor_dict
Beispiel #27
0
def batch_producer(filepath, n_classes, **kwargs):
    """Function for loading batches of images and
    and labels from a csv *without* a header. CSV files
    must be in the format of
        class_code,/abs/path/to/img
        class_code,/abs/path/to/img
        class_code,/abs/path/to/img

    Parameters
    -----------
    filepath : list
        list of paths to csv files. Even if just using one file, it must
        be a list. For example ['/path/to/file.csv']
    n_classes : int
        number of classes to be used in one-hot encoding
    batch_size : (kwarg) int
        number of samples per batch. Default is 4
    epochs : (kwarg) int
        number of epochs to run. Default is 70
    img_shape : (kwarg) tuple
        shape of the image. Must be in the form of (H,W,C). Image
        will *not* be resized, the value is used for setting
        the shape for the batch queue. Default is (224, 224, 3)
    is_training : (kwarg) bool
        when set to true, the loader will apply image transformations.
        Default is True
    num_threads : (kwarg) int
        number of threads to use for the loader. Default is 4
    """
    batch_size = kwargs.pop("batch_size", 4)
    img_shape = kwargs.pop("image_shape", (224, 224, 3))
    num_threads = kwargs.pop("num_threads", 4)
    epochs = kwargs.pop("epochs", 70)
    is_training = kwargs.pop("is_trianing", True)

    # loads a series of text files
    filename_queue = tf.train.string_input_producer(filepath, num_epochs=epochs)

    # used to read each text file line by line
    reader = tf.TextLineReader()

    # actually parse the text file. returns idx, content
    _, record = reader.read(filename_queue)

    # split out the csv. Defaults to returning strings.
    img_class, fname = tf.decode_csv(record, record_defaults=[[1], [""]])

    img_content = read_one_image(fname, is_training=is_training, image_shape=img_shape)

    # load batches of images all multithreaded like
    class_batch, img_batch = tf.train.shuffle_batch([img_class, img_content],
                                                    batch_size=batch_size,
                                                    capacity=batch_size * 4,
                                                    num_threads=num_threads,
                                                    min_after_dequeue=batch_size * 2)

    one_hot_classes = tf.one_hot(class_batch, depth=n_classes,
                                 on_value=1.0, off_value=0.0)
    return one_hot_classes, img_batch
def read_data(filename_queue):
    reader = tf.TextLineReader()
    key, record_string = reader.read(filename_queue)
    record_defaults = [[1], [1], [1]]
    col1, col2, col3 = tf.decode_csv(record_string, record_defaults=record_defaults)
    features = tf.pack([col1, col2])
    label = col3
    return features, label
def read_mnist_csv(filename_queue):
  reader = tf.TextLineReader(skip_header_lines=1)
  key, value = reader.read(filename_queue)
  record_defaults = [[0]for row in range(785)]
  cols = tf.decode_csv( value, record_defaults=record_defaults)
  features = tf.stack(cols[1:])
  label = tf.stack([cols[0]])
  return features, label
Beispiel #30
0
def read_from_csv(filename_queue):
    reader = tf.TextLineReader()
    key, value = reader.read(filename_queue)
    record_defaults = [[""],[0]]
    image_path, label = tf.decode_csv(value, field_delim=" ", record_defaults=record_defaults)
    print("imagepath is: ", image_path)
    image = tf.image.decode_jpeg(tf.read_file(image_path), channels=3)
    return image, label 
        gpu_options=tf.GPUOptions(allow_growth=True, visible_device_list='1'))

start_time = time.time()
print("start time : " + str(start_time))

with tf.name_scope('LoadImage'):
    csv_name = a.csv_name
    #csv_name = "/home/zhaoyin-t/plant_disease/traindata_int_small_random.csv" #合成あり
    #csv_name = "/home/zhaoyin-t/plant_disease/traindata_seg_int.csv" #セグメンテーション
    filename_queue = tf.train.string_input_producer([csv_name], shuffle=True)
    reader = tf.TextLineReader()
    _, val = reader.read(filename_queue)
    record_defaults = [["a"], ["a"], [0]]
    #record_defaults = [["a"],[0], [0], [0]]
    #path, _, label = tf.decode_csv(val, record_defaults=record_defaults)
    path, _, label = tf.decode_csv(val, record_defaults=record_defaults)
    readfile = tf.read_file(path)
    image = tf.image.decode_jpeg(readfile, channels=3)
    image = tf.image.convert_image_dtype(image, dtype=tf.float32)
    image = tf.cast(image, dtype=tf.float32)

    height, width, ch = image.get_shape()
    # transform params
    CROP_SIZE = 256
    SCALE_SIZE = 286
    rot90_times = tf.random_uniform([1], 0, 5, dtype=tf.int32)[0]
    crop_offset = tf.cast(tf.floor(
        tf.random_uniform([2], 0, SCALE_SIZE - CROP_SIZE + 1, seed=seed)),
                          dtype=tf.int32)

    def transform(img,
Beispiel #32
0

csv_file_names = Get_files_path()
# 1) create a FIFO queue #
filename_queue = tf.train.string_input_producer(csv_file_names)
print('filename_queue: ', filename_queue)
# 1.1) Reader
reader = tf.TextLineReader()
print('reader: ', reader)
key, value = reader.read(filename_queue)
# 1.2) Parse line
record_defaults = [[] for i in range(785)
                   ]  # notice : tf.decode_csv return like this format-type #
print('record_defaults: ', record_defaults)
parse_record_op = tf.decode_csv(value,
                                record_defaults=record_defaults,
                                field_delim=',')  # return a list #
print('parse_record: ', len(parse_record_op))
#feature        = parse_record[1:]
#label          = parse_record[0]

# 2) Paras define
random_par = tf.Variable(
    tf.random_normal(shape=(2, 3), mean=0, stddev=1.0, dtype=tf.float32))
zeros_par = tf.Variable(tf.zeros(shape=(2, 3), dtype=tf.float32))

init_op = tf.global_variables_initializer()

# 3) Session run
gpu_options = tf.GPUOptions(allow_growth=True)
with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
 def parse(line):
     fields = tf.decode_csv(line, [[0.0], [0.0]], field_delim="\t")
     return {"x": fields[0]}, fields[1]
Beispiel #34
0
 def parse_csv(value):
     print('Parsing', data_file)
     columns = tf.decode_csv(value, record_defaults=_CSV_COLUMN_DEFAULTS)
     features = dict(zip(_CSV_COLUMNS, columns))
     labels = features.pop('income_bracket')
     return features, tf.equal(labels, '>50K')
    captcha_dir = captcha_dir + "labels.csv"

         # Construct file queue
    file_queue = tf.train.string_input_producer([captcha_dir], shuffle=False)

         # 
    reader = tf.TextLineReader()

         # Read the label data content of excel
    key, value = reader.read(file_queue)

         # decode csv data
         # records: Specify matrix format and data type
         The 1 in #[1] is used to specify the data type. For example, if there is a decimal in the matrix, it is float, and [1] should be changed to [1.0].
    records = [[1], ["None"]]
    number, label = tf.decode_csv(value, record_defaults=records)

         # Batch data
    label_batch = tf.train.batch([label], batch_size=6000, num_threads=1, capacity=6000)

    return label_batch


def dealwuthlabel(label_str):
    """

    :param label_str:
    :return:
    """
         # Type of verification code string
    letter = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
Beispiel #36
0
my_matrix = np.loadtxt(open("../data/d_train_20180102.csv", "rb"),
                       dtype=data_tpye,
                       delimiter=",",
                       skiprows=0)

filename_queue = tf.train.string_input_producer([
    "../data/d_train_20180102.csv",
])
# 每次一行
print(my_matrix[0][41])
reader = tf.TextLineReader(skip_header_lines=1)
key, value = reader.read(queue=filename_queue)
key, value = reader.read(filename_queue)
# 解析每次的一行,默认以逗号分开
record_defaults = list(data_tpye1)
col1, col2, col3, col4, col5, ol6, col7, col8, col9, col10, col11, col12, col13, col14, col15, col16, col17, col18, col19, col20, col21, col22, col23, col24, col25, col26, col27, col28, col29, col30, col31, col32, col33, col34, col35, col36, col37, col38, col39, col40, col41, col42 = tf.decode_csv(
    value, record_defaults=record_defaults)
features = tf.stack([col1])

init_op = tf.global_variables_initializer()
local_init_op = tf.local_variables_initializer()

number_1 = []
age_1 = []
number_2 = []
tang_1 = []
for i in range(30):
    ii = rd.randint(1, 5642)
    number_1 = number_1 + [my_matrix[ii][0]]
    print(number_1)
    age_1 = age_1 + [my_matrix[ii][2]]
    number_2 = number_2 + [my_matrix[ii][8]]
import tensorflow as tf

filename_queue = tf.train.string_input_producer([
  "hdfs://127.0.0.1:39000/linear/training.csv",
  "hdfs://127.0.0.1:39000/linear/validation.csv",
])

reader = tf.TextLineReader()
key, value = reader.read(filename_queue)

x_observed, y_pred = tf.decode_csv(value, [[0.0],[0.0]])

with tf.Session() as sess:
  # Start populating the filename queue.
  coord = tf.train.Coordinator()
  threads = tf.train.start_queue_runners(coord=coord)
  try:
    for i in range(20):
      # Retrieve and print a single instance:
      example, label = sess.run([x_observed, y_pred])
      print(example, label)
  except tf.errors.OutOfRangeError:
    print("Done!")
  finally:
    coord.request_stop()
    coord.join(threads)
Beispiel #38
0
 def decode_csv(row):
     cols = tf.decode_csv(row,
                          record_defaults=ORDERED_TRAINING_DEFAULTS)
     features = dict(zip(ORDERED_TRAINING_COLUMNS, cols))
     return features
Beispiel #39
0
def main():
    file_queue = tf.train.string_input_producer(['/data/kaggle/train.csv'],
                                                shuffle=False)

    reader = tf.TextLineReader()
    key, value = reader.read(file_queue)
    value = tf.decode_csv(value, [[1.]] * 66)
    gender, age, TotalGV, Intracranial_volume = value[:4]
    DKT = tf.cast(value[4:], tf.float32)
    label = tf.one_hot(tf.cast(gender - 1, tf.int32), 2)

    x_batch, label_batch = tf.train.shuffle_batch([DKT, label], 121, 484, 0, 1)

    batch_queue = tf.contrib.slim.prefetch_queue.prefetch_queue(
        [x_batch, label_batch], capacity=4)
    dequeue_op = batch_queue.dequeue()

    dropout_rate = tf.placeholder(tf.float32)

    temp = tf.layers.dense(x_batch, 1024, activation=tf.nn.sigmoid)
    temp = tf.layers.dense(temp, 2048, activation=tf.nn.sigmoid)
    temp = tf.layers.dropout(temp, dropout_rate)
    y = tf.layers.dense(temp, 2)

    loss = tf.losses.softmax_cross_entropy(onehot_labels=label_batch, logits=y)
    train = tf.train.AdamOptimizer(0.001).minimize(loss)
    accuracy = tf.metrics.accuracy(labels=tf.argmax(label_batch, 1),
                                   predictions=tf.argmax(y, 1))[1]

    config = tf.ConfigProto(log_device_placement=True)
    config.gpu_options.per_process_gpu_memory_fraction = 1.0
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    saver = tf.train.Saver()
    init = (tf.global_variables_initializer(),
            tf.local_variables_initializer())

    tf.summary.scalar('loss', loss)
    tf.summary.scalar('accuracy', accuracy)
    merged = tf.summary.merge_all()
    logdir_train = "tensorboard_train/" + datetime.datetime.now().strftime(
        "%Y%m%d-%H%M%S") + "/"
    logdir_test = "tensorboard_test/" + datetime.datetime.now().strftime(
        "%Y%m%d-%H%M%S") + "/"
    writer_train = tf.summary.FileWriter(logdir_train, sess.graph)
    writer_test = tf.summary.FileWriter(logdir_test, sess.graph)

    sess.run(init)
    saver.restore(sess, log)

    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess, coord)
    global_step = 0
    for i in range(10000):
        for j in range(3):
            _, sumery, loss_train = sess.run([train, merged, loss],
                                             {dropout_rate: 0.5})
            print('step {}: loss = {}'.format(global_step, loss_train))
            writer_train.add_summary(sumery, global_step)
            global_step += 1
        sumery = sess.run(merged, {dropout_rate: 1})
        writer_test.add_summary(sumery, global_step)
    saver.save(sess, log)
    coord.request_stop()
    coord.join(threads)
    print('complete')
Beispiel #40
0
def _parse_infer_csv(line):
    cols_types = [['']] * 2
    columns = tf.decode_csv(line, record_defaults=cols_types, field_delim='\t')
    return columns
    def raw_training_input_fn():
        """Training input function that reads raw data and applies transforms."""

        if isinstance(raw_data_file_pattern, six.string_types):
            filepath_list = [raw_data_file_pattern]
        else:
            filepath_list = raw_data_file_pattern

        files = []
        for path in filepath_list:
            files.extend(file_io.get_matching_files(path))

        filename_queue = tf.train.string_input_producer(
            files, num_epochs=num_epochs, shuffle=randomize_input)

        csv_id, csv_lines = tf.TextLineReader().read_up_to(
            filename_queue, training_batch_size)

        queue_capacity = (reader_num_threads +
                          3) * training_batch_size + min_after_dequeue
        if randomize_input:
            _, batch_csv_lines = tf.train.shuffle_batch(
                tensors=[csv_id, csv_lines],
                batch_size=training_batch_size,
                capacity=queue_capacity,
                min_after_dequeue=min_after_dequeue,
                enqueue_many=True,
                num_threads=reader_num_threads,
                allow_smaller_final_batch=allow_smaller_final_batch)

        else:
            _, batch_csv_lines = tf.train.batch(
                tensors=[csv_id, csv_lines],
                batch_size=training_batch_size,
                capacity=queue_capacity,
                enqueue_many=True,
                num_threads=reader_num_threads,
                allow_smaller_final_batch=allow_smaller_final_batch)

        csv_header, record_defaults = csv_header_and_defaults(features,
                                                              schema,
                                                              stats,
                                                              keep_target=True)
        parsed_tensors = tf.decode_csv(batch_csv_lines,
                                       record_defaults,
                                       name='csv_to_tensors')
        raw_features = dict(zip(csv_header, parsed_tensors))

        transform_fn = make_preprocessing_fn(analysis_output_dir,
                                             features,
                                             keep_target=True)
        transformed_tensors = transform_fn(raw_features)

        # Expand the dims of non-sparse tensors. This is needed by tf.learn.
        transformed_features = {}
        for k, v in six.iteritems(transformed_tensors):
            if isinstance(v, tf.Tensor) and v.get_shape().ndims == 1:
                transformed_features[k] = tf.expand_dims(v, -1)
            else:
                transformed_features[k] = v

        # image_feature_engineering does not need to be called as images are not
        # supported in raw csv for training.

        # Remove the target tensor, and return it directly
        target_name = get_target_name(features)
        if not target_name or target_name not in transformed_features:
            raise ValueError('Cannot find target transform in features')

        transformed_target = transformed_features.pop(target_name)

        return transformed_features, transformed_target
Beispiel #42
0
filenames = ['data/text%d.txt'%i for i in range(1,4)]
filename_queue = tf.train.string_input_producer(filenames, capacity=3, shuffle=True, name='string_input_producer')

for f in filenames:
    if not tf.gfile.Exists(f):
        raise ValueError('Failed to find file: ' + f)
    else:
        print('File %s found.'%f)

reader = tf.TextLineReader()

key, value = reader.read(filename_queue, name='text_read_op')

record_defaults = [[-1.0], [-1.0],[-1.0], [-1.0],[-1.0], [-1.0],[-1.0], [-1.0],[-1.0], [-1.0]]

col1, col2, col3, col4, col5, col5, col7, col8, col9, col10 = tf.decode_csv(value, record_defaults=record_defaults)

features = tf.stack([col1, col2, col3, col4, col5, col5, col7, col8, col9, col10])

x = tf.train.shuffle_batch([features], batch_size=3, capacity=5, name='data_batch', min_after_dequeue=1, num_threads=1)

coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord, sess=session)

W = tf.Variable(tf.random_uniform(shape=[10,5], minval=-0.1, maxval=0.1, dtype=tf.float32), name='W')
b = tf.Variable(tf.zeros(shape=[5], dtype=tf.float32), name='b')

h = tf.nn.sigmoid(tf.matmul(x, W) + b)

tf.global_variables_initializer().run()
print("test")
Beispiel #43
0
from six.moves import xrange  # pylint: disable=redefined-builtin
import tensorflow as tf
from sklearn import metrics
import define_net_resp

hidden1=10
hidden2=10
hidden3=10
learning_rate=0.1
max_steps=6000

file_tt=str(sys.argv[1])
l_ex=file_len(file_tt)
filename_tt = tf.train.string_input_producer([file_tt],shuffle=False)
reader_tt = tf.TextLineReader(skip_header_lines=0)
_, csv_row_tt = reader_tt.read(filename_tt)
col1t, col2t, col3t, col4t, col5t, col6t, col7t, col8t, col9t, col10t,  col11t,  col12t,  col13t, col14t = tf.decode_csv(csv_row_tt, record_defaults=record_defaults)
features_tt = tf.pack([col1t, col2t, col3t, col4t, col5t, col6t, col7t, col8t, col9t, col10t,  col11t,  col12t,  col13t])

min_after_dequeue = 10000000
    capacity = min_after_dequeue +  define_net_resp.FEATURES * batch_size


images_batch,label_batch = tf.train.batch([features_tt,col14t-1],batch_size=batch_size,
                                                          capacity=capacity,num_threads=1)

def main(_):
    run_training()

if __name__ == '__main__':
    tf.app.run()
Beispiel #44
0
# REF [site] >> https://www.tensorflow.org/programmers_guide/reading_data

import tensorflow as tf

#--------------------------------------------------------------------
# CSV file.

filename_queue = tf.train.string_input_producer(["file0.csv", "file1.csv"])

reader = tf.TextLineReader()
key, value = reader.read(filename_queue)

# Default values, in case of empty columns. Also specifies the type of the decoded result.
record_defaults = [[1], [1], [1], [1], [1]]
col1, col2, col3, col4, col5 = tf.decode_csv(value, record_defaults = record_defaults)
features = tf.stack([col1, col2, col3, col4])

with tf.Session() as sess:
	# Start populating the filename queue.
	coord = tf.train.Coordinator()
	threads = tf.train.start_queue_runners(coord = coord)

	for i in range(1200):
		# Retrieve a single instance:
		example, label = sess.run([features, col5])

	coord.request_stop()
	coord.join(threads)
Beispiel #45
0
 def decode_csv(csv_row):
     filename, label = tf.decode_csv(records=csv_row,
                                     record_defaults=[[''], ['']])
     image_bytes = tf.read_file(filename=filename)
     return image_bytes, label
Beispiel #46
0
def get_batch(param_dict=params.default_param_dict(), shuffled=True):
    """ uses data_batch_size, data_dir, data_mode, data_serialized """

    batch_size = param_dict["data_batch_size"]
    all_files = sorted(os.listdir(param_dict["data_dir"]))
    nthreads = 1

    # get data characteristics
    data_char_file = os.path.join(param_dict["data_dir"], "info.yml")
    with open(data_char_file) as infile:
        data_dict = yaml.safe_load(infile)
    print("### start load_data")
    print("data class_column: " + str(data_dict['class_column']))
    print("data num_metadata: " + str(data_dict['num_metadata']))
    print("data num_examples: " + str(data_dict['num_examples']))

    # get list of files
    filenames = []
    filesuffix = '.data'
    readtype = tf.TextLineReader
    if param_dict["data_serialized"]:
        filesuffix = '.tfrecord'
        readtype = tf.TFRecordReader
    for fname in all_files:
        if filesuffix in fname:
            filenames.extend([param_dict["data_dir"] + fname])
    nreaders = min(nthreads, len(filenames))
    print("data number of datafiles: " + str(len(filenames)))
    print("data example datafile: " + filenames[0])
    print("data batch_size: " + str(batch_size))
    print("data nreaders: " + str(nreaders))

    # read in example
    if shuffled:
        # Reads multiple records in parallel from data_sources using n readers.
        key, example = slim.parallel_reader.parallel_read(
            filenames,
            readtype,
            num_epochs=None,
            num_readers=nreaders,
            shuffle=True,
            dtypes=None,
            capacity=32 * batch_size,
            min_after_dequeue=16 * batch_size,
            seed=19850411,
            scope=None)
    else:
        # Reads sequentially the data_sources using the reader, doing a single pass.
        filename_queue = tf.train.string_input_producer(filenames,
                                                        shuffle=False)
        reader = readtype()
        key, example = reader.read(filename_queue)

    #print("data raw example size: " + str(example))

    # decode example into features, label and metadata
    if param_dict["data_serialized"]:
        parse_example = tf.parse_single_example(
            example,
            features={
                'feature_values':
                tf.FixedLenFeature([data_dict['class_column'] - 1],
                                   tf.float32),
                'label':
                tf.FixedLenFeature([1], tf.int64),
                'meta_values':
                tf.FixedLenFeature([data_dict['num_metadata']], tf.string)
            })
        features = tf.cast(parse_example['feature_values'], tf.float32)
        label = tf.cast(parse_example['label'], tf.int32)
        metadata = tf.cast(parse_example['meta_values'], tf.string)
    else:
        record_defaults = [[1.0]
                           for dim in range(data_dict['class_column'] - 1)]
        record_defaults.extend([[1]])
        record_defaults.extend([['str']
                                for dim in range(data_dict['num_metadata'])])
        print("data record_defaults length: " + str(len(record_defaults)))
        reader = tf.decode_csv(records=example,
                               record_defaults=record_defaults,
                               field_delim="\t")
        #print("data size of reader: " + str(reader))
        #tf.decode_csv() from slim.parallel_reader.parallel_read() returns tensors
        #with <unknown> shape.
        #This shape needs to be casted to () to be used with tf.train.batch()
        reshaped_reader = []
        for tensor in reader:
            reshaped_reader.append(tf.reshape(tensor, []))
        #print("data size of reshaped_reader: " + str(reshaped_reader))
        features = reshaped_reader[0:data_dict['class_column'] - 1]
        label = reshaped_reader[data_dict['class_column'] -
                                1:data_dict['class_column']]
        metadata = reshaped_reader[data_dict['class_column']:(
            data_dict['class_column'] + data_dict['num_metadata'])]
    #print("data size of features: " + str(features))
    #print("data size of label: " + str(label))
    #print("data size of metadata: " + str(metadata))

    # reformat example features
    label = tf.squeeze(label)
    input_size = data_dict['class_column'] - 1
    if param_dict['data_mode'] == 'diff':
        input_size = int((input_size) / 2)
        features = (tf.slice(features, [input_size], [input_size]) -
                    tf.slice(features, [0], [input_size]))
    elif param_dict['data_mode'] == 'exp_only':
        input_size = int((input_size) / 2)
        features = tf.slice(features, [input_size], [input_size])
    #features.set_shape([input_size])
    param_dict['data_input_size'] = input_size
    print("data orig input_size: " + str(data_dict['class_column'] - 1))
    print("data final input_size: " + str(input_size))

    # create batch
    if shuffled:
        feat_b, label_b, meta_b = tf.train.shuffle_batch(
            [features, label, metadata],
            batch_size=batch_size,
            num_threads=nthreads,
            capacity=32 * batch_size,
            min_after_dequeue=16 * batch_size,
            seed=19850411,
            allow_smaller_final_batch=True)
    else:
        feat_b, label_b, meta_b = tf.train.batch(
            [features, label, metadata],
            batch_size=batch_size,
            num_threads=1,
            capacity=batch_size,
            allow_smaller_final_batch=True)

    return feat_b, label_b, meta_b, input_size, data_dict[
        'num_metadata'], data_dict['num_examples']
Beispiel #47
0
def parse_row(row):
    fields = tf.decode_csv(records=row, record_defaults=CSV_DEFAULTS)
    features = dict(zip(CSV_COLUMN_NAMES, fields))
    label = features.pop("fare_amount")
    return features, label
Beispiel #48
0
def parser_csv(line):
    parsed_line = tf.decode_csv(line, [['string'], ['tf.int64'], ['tf.int64']])
    label = parsed_line[-1]
    augment = parsed_line[1]
    return parsed_line[0], augment, label
Beispiel #49
0
    def load_train_batch(self):
        """Load a batch of training instances.
        """
        opt = self.opt

        # Load the list of training files into queues
        #TODO
        if opt.train_lite:
            file_list = self.format_file_list(opt.dataset_dir,
                                              opt.filelist_dir, 'train_lite')
        else:
            file_list = self.format_file_list(opt.dataset_dir,
                                              opt.filelist_dir, 'train')
        image_paths_queue = tf.train.string_input_producer(
            file_list['image_file_list'], shuffle=False)
        cam_paths_queue = tf.train.string_input_producer(
            file_list['cam_file_list'], shuffle=False)

        # Load camera intrinsics
        cam_reader = tf.TextLineReader()
        _, raw_cam_contents = cam_reader.read(cam_paths_queue)
        rec_def = []
        for i in range(9):
            rec_def.append([1.])
        raw_cam_vec = tf.decode_csv(raw_cam_contents, record_defaults=rec_def)
        raw_cam_vec = tf.stack(raw_cam_vec)
        intrinsics = tf.reshape(raw_cam_vec, [3, 3])

        # Load images
        img_reader = tf.WholeFileReader()
        _, image_contents = img_reader.read(image_paths_queue)
        image_seq = tf.image.decode_jpeg(image_contents)
        tgt_image, src_image_stack = \
            self.unpack_image_sequence(
                image_seq, opt.img_height, opt.img_width, opt.num_source)

        #TODO Load Semantics
        #     See cityscape label defs in https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/helpers/labels.py#L62
        #     Also notice that deeplabv3+ uses `train_id` https://github.com/tensorflow/models/blob/69b016449ffc797421bf003d8b7fd8545db866d7/research/deeplab/datasets/build_cityscapes_data.py#L46
        #     Color maps are in https://github.com/tensorflow/models/blob/69b016449ffc797421bf003d8b7fd8545db866d7/research/deeplab/utils/get_dataset_colormap.py#L207
        if opt.sem_assist:
            sem_paths_queue = tf.train.string_input_producer(
                file_list['sem_image_file_list'], shuffle=False)
            sem_reader = tf.WholeFileReader()
            sem_keys, sem_contents = sem_reader.read(sem_paths_queue)

            if opt.load_from_raw:
                sem_seq = tf.reshape(
                    tf.decode_raw(sem_contents, tf.uint8),
                    [1, opt.img_height, (opt.num_source + 1) * opt.img_width])
            else:
                sem_seq = tf.py_func(read_npy_file, [sem_keys], [
                    tf.uint8,
                ])

        #TODO Load Instances: we use COCO
        #     Two channels: class and id level. For id level we only use the edge
        if opt.ins_assist:
            ins_paths_queue = tf.train.string_input_producer(
                file_list['ins_image_file_list'], shuffle=False)
            ins_reader = tf.WholeFileReader()
            ins_keys, ins_contents = ins_reader.read(ins_paths_queue)

            if opt.load_from_raw:
                ins_seq = tf.reshape(tf.decode_raw(ins_contents, tf.uint8), [
                    1, opt.img_height, (opt.num_source + 1) * opt.img_width, 2
                ])
            else:
                ins_seq = tf.py_func(read_npy_file, [ins_keys], [
                    tf.uint8,
                ])

        #TODO 1. SHUFFLE BATCH
        # Form training batches
        seed = random.randint(0, 2**31 - 1)
        min_after_dequeue = 2048
        capacity = min_after_dequeue + opt.num_threads * opt.batch_size

        if opt.sem_assist and opt.ins_assist:
            src_image_stack, tgt_image, intrinsics, sem_seq, ins_seq = tf.train.shuffle_batch(
                [
                    src_image_stack, tgt_image, intrinsics, sem_seq[0],
                    ins_seq[0]
                ], opt.batch_size, capacity, min_after_dequeue,
                opt.num_threads, seed)

        elif opt.sem_assist:
            src_image_stack, tgt_image, intrinsics, sem_seq = tf.train.shuffle_batch(
                [src_image_stack, tgt_image, intrinsics, sem_seq[0]],
                opt.batch_size, capacity, min_after_dequeue, opt.num_threads,
                seed)

        elif opt.ins_assist:
            src_image_stack, tgt_image, intrinsics, ins_seq = tf.train.shuffle_batch(
                [src_image_stack, tgt_image, intrinsics, ins_seq[0]],
                opt.batch_size, capacity, min_after_dequeue, opt.num_threads,
                seed)

        else:
            src_image_stack, tgt_image, intrinsics = tf.train.shuffle_batch(
                [src_image_stack, tgt_image, intrinsics], opt.batch_size,
                capacity, min_after_dequeue, opt.num_threads, seed)

        # semantic segmentation
        tgt_sem = None
        tgt_sem_map = None
        tgt_sem_mask = None
        tgt_sem_edge = None
        src_sem_stack = None
        src_sem_map_stack = None
        src_sem_mask_stack = None
        src_sem_edge_stack = None

        # ins0 ~ instance level, but still class segmentation
        tgt_ins0 = None
        tgt_ins0_map = None
        tgt_ins0_edge = None
        src_ins0_stack = None
        src_ins0_map_stack = None
        src_ins0_edge_stack = None

        # ins1 ~ instance level, but this is id segmentation
        tgt_ins1_edge = None
        src_ins1_edge_stack = None

        #TODO 2. TRAMSFORMATION AND UNPACKING
        if opt.sem_assist:
            #TODO get one-hot encoded         sem_oh_seq (4,128,1248,19)X{0,1}
            sem_oh_seq = tf.cast(
                tf.one_hot(sem_seq, on_value=1, depth=opt.sem_num_class),
                tf.uint8)
            #TODO decouple   tgt_sem (4,128,1248,19)X{0,1}   src_sem_stack (4,128,1248,2*19)X{0,1}
            tgt_sem, src_sem_stack = self.unpack_sem_sequence_batch_atom(
                sem_oh_seq, opt.sem_num_class)

            #TODO get densemap     sem_map_seq (4,128,1248,1)X{0,1,...,18}
            sem_map_seq = tf.expand_dims(sem_seq, -1)
            #TODO decouple   tgt_sem_map (4,128,1248,1)X{0,1,...,18}   src_sem_map_stack (4,128,1248,2*1)X{0,1,...,18}
            tgt_sem_map, src_sem_map_stack = self.unpack_sem_sequence_batch_atom(
                sem_map_seq, 1)

            if opt.sem_mask_explore:
                #TODO get sem mask   sem_mask_seq (4,128,1248,c) here we assume c=1
                sem_mask_seq = self.get_sem_mask_batch(sem_seq)
                #TODO decouple   tgt_sem_mask (4,128,1248,c)   src_sem_mask_stack (4,128,1248,2*c)
                tgt_sem_mask, src_sem_mask_stack = self.unpack_sem_sequence_batch_atom(
                    sem_mask_seq, 1)

            if opt.sem_edge_explore:
                #TODO get sem edge   sem_edge_seq (4,128,1248,c) here we assume c=1
                sem_edge_seq = self.get_sem_edge_batch(sem_seq)
                #TODO decouple   tgt_sem_edge (4,128,1248,c)   src_sem_edge_stack (4,128,1248,2*c)
                tgt_sem_edge, src_sem_edge_stack = self.unpack_sem_sequence_batch_atom(
                    sem_edge_seq, 1)

        if opt.ins_assist:
            ins0_seq = ins_seq[:, :, :, 0]
            ins1_seq = ins_seq[:, :, :, 1]

            #TODO get one-hot  ins0_oh_seq (4,128,1248,81)X{0,1}
            ins0_oh_seq = tf.cast(
                tf.one_hot(ins0_seq, on_value=1, depth=opt.ins_num_class),
                tf.uint8)
            #ins1_oh_seq = tf.cast(tf.one_hot(ins1_seq, on_value=1, depth = 255), tf.uint8)

            #TODO decouple   tgt_ins0 (4,128,1248,81)X{0,1}   src_ins0_stack (4,128,1248,2*81)X{0,1}
            tgt_ins0, src_ins0_stack = self.unpack_sem_sequence_batch_atom(
                ins0_oh_seq, opt.ins_num_class)
            #tgt_ins1, src_ins1_stack = self.unpack_sem_sequence_batch_atom(ins1_oh_seq, opt.ins_num_class)

            #TODO get densemap  sem_ins0_seq (4,128,1248,1)X{0,1,...,80}
            ins0_map_seq = ins_seq[:, :, :, :1]
            ins1_map_seq = ins_seq[:, :, :, 1:]

            #TODO decouple  tgt_ins0_map (4,128,1248,1)X{0,1,...,80}  src_ins0_map_stack (4,128,1248,2*1)X{0,1,...,80}
            tgt_ins0_map, src_ins0_map_stack = self.unpack_sem_sequence_batch_atom(
                ins0_map_seq, 1)
            tgt_ins1_map, src_ins1_map_stack = self.unpack_sem_sequence_batch_atom(
                ins1_map_seq, 1)

            if opt.ins0_edge_explore:
                #TODO get edge   ins0_edge_seq (4,128,1248,c)  here we assume c=1
                ins0_edge_seq = self.get_sem_edge_batch(ins0_seq)
                #TODO decouple   tgt_ins0_edge (4,128,1248,c)  src_ins0_edge_stack (4,128,1248,2*c)
                tgt_ins0_edge, src_ins0_edge_stack = self.unpack_sem_sequence_batch_atom(
                    ins0_edge_seq, 1)

            if opt.ins1_edge_explore:
                #TODO get edge   ins1_edge_seq (4,128,1248,c) here we assume c=1
                ins1_edge_seq = self.get_sem_edge_batch(ins1_seq)
                #TODO decouple   tgt_ins1_edge (4,128,1248,c)   src_ins1_edge_stack (4,128,1248,2*c)
                tgt_ins1_edge, src_ins1_edge_stack = self.unpack_sem_sequence_batch_atom(
                    ins1_edge_seq, 1)

        #TODO 3. DATA AUGMENTATION
        image_all = tf.concat([tgt_image, src_image_stack], axis=3)
        image_all, intrinsics, aug_params = self.data_augmentation(
            image_all, intrinsics, opt.img_height,
            opt.img_width)  #TODO changed API

        if opt.sem_assist:
            ##TODO Do the same data augmentation for semantic segmentations
            tgt_sem, src_sem_stack = self.data_aug(tgt_sem, src_sem_stack,
                                                   aug_params, "bilinear")
            tgt_sem_map, src_sem_map_stack = self.data_aug(
                tgt_sem_map, src_sem_map_stack, aug_params, "neighbor")

            if self.opt.sem_mask_explore:
                tgt_sem_mask, src_sem_mask_stack = \
                    self.data_aug(tgt_sem_mask, src_sem_mask_stack, aug_params, "bilinear")

            if self.opt.sem_edge_explore:
                tgt_sem_edge, src_sem_edge_stack = \
                    self.data_aug(tgt_sem_edge, src_sem_edge_stack, aug_params, "bilinear")
                #TODO maybe transfer needs this settings self.data_aug(tgt_sem_edge, src_sem_edge_stack, aug_params, "neighbor")

        if opt.ins_assist:
            ##TODO Do the same data augmentation for instance segmentations
            tgt_ins0, src_ins0_stack = self.data_aug(tgt_ins0, src_ins0_stack,
                                                     aug_params, "bilinear")
            #tgt_ins1, src_ins1_stack = self.data_aug(tgt_ins1, src_ins1_stack, aug_params, "bilinear")

            tgt_ins0_map, src_ins0_map_stack = self.data_aug(
                tgt_ins0_map, src_ins0_map_stack, aug_params, "neighbor")
            #tgt_ins1_map, src_ins1_map_stack = self.data_aug(tgt_ins1_map, src_ins1_map_stack, aug_params, "neighbor")

            if self.opt.ins0_edge_explore:
                tgt_ins0_edge, src_ins0_edge_stack = \
                    self.data_aug(tgt_ins0_edge, src_ins0_edge_stack, aug_params, "bilinear")
                #TODO maybe transfer needs this settings self.data_aug(tgt_ins0_edge, src_ins0_edge_stack, aug_params, "neighbor")

            if self.opt.ins1_edge_explore:
                tgt_ins1_edge, src_ins1_edge_stack = \
                    self.data_aug(tgt_ins1_edge, src_ins1_edge_stack, aug_params, "bilinear")
                #TODO maybe transfer needs this settings self.data_aug(tgt_ins1_edge, src_ins1_edge_stack, aug_params, "neighbor")

        # 4. RETURN
        # image_channels=3*opt.seq_length
        tgt_image = image_all[:, :, :, :3]
        src_image_stack = image_all[:, :, :, 3:]  #3:image_channels]
        intrinsics = self.get_multi_scale_intrinsics(intrinsics,
                                                     opt.num_scales)

        # if opt.sem_assist and opt.ins_assist:
        return tgt_image, src_image_stack, intrinsics, \
                [tgt_sem, tgt_sem_map, tgt_sem_mask, tgt_sem_edge], \
                [src_sem_stack, src_sem_map_stack, src_sem_mask_stack, src_sem_edge_stack], \
                [tgt_ins0, tgt_ins0_map, tgt_ins0_edge, tgt_ins1_edge], \
                [src_ins0_stack, src_ins0_map_stack, src_ins0_edge_stack, src_ins1_edge_stack]
Beispiel #50
0
 def decode_csv(row):
     # row is a string tensor containing the contents of one row
     features = tf.decode_csv(row, record_defaults=DEFAULTS)  # string tensor -> list of 50 rank 0 float tensors
     label = features.pop()  # remove last feature and use as label
     features = tf.stack(features)  # list of rank 0 tensors -> single rank 1 tensor
     return {TIMESERIES_COL: features}, label
Beispiel #51
0
def train_model(path,
                in_seq_size,
                out_seq_size,
                units,
                layers,
                trainiterations,
                batch_size,
                dout,
                dictionary,
                restore=False):

    # Placeholder variables to be feed into the model at run time
    x = tf.placeholder(tf.int32, shape=[None, None])
    y = tf.placeholder(tf.int32, shape=[None, None])
    targets = tf.placeholder(tf.int32, shape=[None, None])
    keep = tf.placeholder(tf.float32)

    # Dictionary reversed to lookup words from values
    rvsdictionary = dict(izip(dictionary.values(), dictionary.keys()))
    # Number of words in the dictionary
    dictsize = len(dictionary)

    # Files that the model is trained on
    filename_queue = tf.train.string_input_producer(
        [str(path) + "dialogs.csv"])

    # Reads the files in the filename_queue
    reader = tf.TextLineReader()
    key, value = reader.read(filename_queue)

    # Decodes the CSV to read the 1 pair string for the training data
    record_defaults = [[""], [""]]
    col1, col2 = tf.decode_csv(value,
                               record_defaults=record_defaults,
                               field_delim=",")

    # Constructs 2 tensors for the features(Input sentences) and labels(correct outputs)
    features = tf.pack(col1)
    labels = tf.pack(col2)

    # Shuffles the inputs
    features, labels = tf.train.shuffle_batch([features, labels],
                                              batch_size,
                                              capacity=10000,
                                              min_after_dequeue=5000,
                                              num_threads=4)

    # Arrays to hold the inputs and the correct outputs
    teminp = []
    temoutput = []
    temtarget = []

    # Makes the list of inputs for the rnn
    for o in range(in_seq_size):
        teminp.append(x[:, o])

    # Makes the list of inputs for the rnn
    for o in range(out_seq_size):
        temoutput.append(y[:, o])
        temtarget.append(targets[:, o])

    # Makes the temporary weights to train the model
    W1 = tf.placeholder(tf.float32, shape=[batch_size, out_seq_size])
    W1_0 = []
    for j in range(out_seq_size):
        W1_0.append(W1[:, j])

    # Makes the rnn cell(Gated Recurrent Unit(rnn cell alternative))
    cell1 = tf.nn.rnn_cell.GRUCell(units)
    # Adds dropout in the layers to make the model more robust
    drop = tf.nn.rnn_cell.DropoutWrapper(cell1,
                                         input_keep_prob=keep,
                                         output_keep_prob=keep)
    # Makes multiple layers of the model
    cell = tf.nn.rnn_cell.MultiRNNCell([drop] * layers)

    # Number of samples for sampled softmax
    num_samples = 512

    # Makes the output projection layer by creating the variables the weights(w) and the bias(b)
    w = tf.get_variable("proj_w", [units, dictsize])
    w_t = tf.transpose(w)
    with tf.device("/cpu:0"):
        b = tf.get_variable("proj_b", [dictsize])
    # Output projection to take the rnn outputs and turn them into the word outputs
    output_projection = (w, b)

    # Sampling function to test the outputs and train the model
    def sampled_loss(inputs, labels):
        labels = tf.reshape(labels, [-1, 1])
        return tf.nn.sampled_softmax_loss(w_t, b, inputs, labels, num_samples,
                                          dictsize)

    # Declares the softmax loss function for training
    softmax_loss_function = sampled_loss

    # Seq2Seq model
    rnn, state = seq2seq_gpu.embedding_attention_seq2seq(
        teminp,
        temoutput,
        cell,
        dictsize,
        dictsize,
        1000,
        output_projection=output_projection,
        feed_previous=False)

    rnnoutputs = [tf.matmul(word, w) + b for word in rnn]

    # Loss function to train the model
    logits = tf.nn.seq2seq.sequence_loss(
        rnn, temtarget, W1_0, softmax_loss_function=softmax_loss_function)
    tf.scalar_summary("Loss", logits)

    # Optimizer to change the weights in the model
    train = tf.train.AdagradOptimizer(0.1).minimize(logits)

    # Saves the model after training
    saver = tf.train.Saver()

    # GPU config files to control memory usage
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    # Initializes all the variables and creates all the Tensorflow objects above
    init_op = tf.initialize_all_variables()
    sess = tf.InteractiveSession(config=config)
    sess.run(init_op)

    # Takes data for training to be easily viewed
    merged = tf.merge_all_summaries()
    writer = tf.train.SummaryWriter(path + "graph", sess.graph)

    # Starts the treads for training
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)

    # If true will restore the model from a previous version
    if restore:
        print("Restoring Model")
        saver.restore(sess, str(path) + "model.ckpt")

    # Training loop
    for i in range(trainiterations):
        # Gets the time to visualize training times
        cutime = time.time() * 1000

        # Gets the data from the dialogs file
        data, outputs = sess.run([features, labels])

        # Lists of the data to be trained
        databatch = []
        labelsbatch = []
        correctoutputsbatch = []

        # Loop to turn each word in the training sentences into integer arrays
        for line in data:
            # Splits the data by word
            data = re.split("\s", line)
            # Creates a list of the word integers
            tempdata = []
            # Fills the tempdata list with the word integers
            for word in data:
                if dictionary.get(word) is not None:
                    tempdata.append(dictionary[word])
                else:
                    tempdata.append(dictionary["UKN"])
            # Fills the rest of the empty spaces with null values
            for p in range(in_seq_size - len(tempdata)):
                tempdata.append(dictionary["NULL"])
            # Reverses the integers This has been show to make the model better
            tempdata.reverse()
            # Adds this sentence to the batch
            databatch.append(tempdata)

        # Loop to turn each word in the training sentences into integer arrays
        for line in outputs:
            # Splits the data by word
            outputs = re.split("\s", line)
            # Creates a list of the word integers
            outputs.insert(0, "GO")
            tempoutputs = []
            # Fills the tempoutputs list with the word integers
            for word in outputs:
                if dictionary.get(word) is not None:
                    tempoutputs.append(dictionary[word])
            # Fills the rest of the empty spaces with null values
            for p in range(out_seq_size - len(tempoutputs)):
                tempoutputs.append(dictionary["NULL"])
            # Makes the correct outputs to train the model on
            correctoutputs = [
                tempoutputs[k + 1] for k in range(len(tempoutputs) - 1)
            ]
            correctoutputs = np.append(np.array(correctoutputs),
                                       dictionary["NULL"])
            # Adds the lists to the batches for training
            labelsbatch.append(tempoutputs)
            correctoutputsbatch.append(correctoutputs)

        # Makes the batches into arrays to be used by Tensorflow
        databatch = np.array(databatch)
        labelsbatch = np.array(labelsbatch)
        correctoutputsbatch = np.array(correctoutputsbatch)

        # Training action to change the weights of the model
        summery, _ = sess.run(
            [merged, train],
            feed_dict={
                x: databatch,
                y: labelsbatch,
                targets: correctoutputsbatch,
                W1: np.ones([batch_size, out_seq_size], dtype=np.float32),
                keep: 0.5
            })
        #print(sess.run(tf.get_default_graph().get_tensor_by_name("embedding_attention_seq2seq/embedding_attention_decoder/embedding:0")))
        # Writes data to a file to be viewed
        writer.add_summary(summery, global_step=i)

        if dout:
            tempout = sess.run(rnnoutputs,
                               feed_dict={
                                   x: databatch,
                                   y: labelsbatch,
                                   keep: 1.0
                               })
            tempdata = np.split(np.array(tempout), batch_size, 1)

            data = []
            for sent in tempdata:
                temdata = []

                for word in sent:
                    temdata.append(rvsdictionary[np.argmax(word)])
                temdata = [item for item in temdata if item != 'NULL']
                data.append(temdata)
            print(data)
        print("Time: " + str((time.time() * 1000) - cutime) + " Iteration: " +
              str(i))

        if i % 10000 == 0 and i is not 0:
            saver.save(sess, str(path) + "model.ckpt", global_step=i)
            print("Model Saved")

    saver.save(sess, str(path) + "model.ckpt")

    coord.request_stop()
    coord.join(threads)
Beispiel #52
0
# data csv files
train_csv_dir = "/mnt/hdd3t/Data/hci1/hoon/LightHouse_of_Inha/CSVs/3th/size/train_G_size.csv"
test_csv_dir = "/mnt/hdd3t/Data/hci1/hoon/LightHouse_of_Inha/CSVs/3th/size/test_G_size.csv"

image_height = 299
image_width = 299
train_batch_size = 32 # batch size
test_batch_size = 16
num_out = 3 # number of output result


# train data load
train_queue = tf.train.string_input_producer([train_csv_dir])
train_reader = tf.TextLineReader()
_, train_csv_value = train_reader.read(train_queue)
train_img_dir, train_label, train_gender = tf.decode_csv(train_csv_value, record_defaults=[[""], [-1], [-1]])
train_img_value = tf.read_file(train_img_dir)
train_img = tf.reshape(tf.cast(tf.image.decode_jpeg(train_img_value, channels=3), dtype=tf.float32), shape=[image_height, image_width, 3])
train_label = tf.reshape(tf.one_hot(train_label, depth=num_out, on_value=1.0, off_value=0.0), shape=[num_out])
train_gender = tf.reshape(train_gender, shape=[1])

# test data load
test_queue = tf.train.string_input_producer([test_csv_dir], shuffle=False)
test_reader = tf.TextLineReader()
_, test_csv_value = test_reader.read(test_queue)
test_img_dir, test_label, test_gender = tf.decode_csv(test_csv_value, record_defaults=[[""], [-1], [-1]])
test_img_value = tf.read_file(test_img_dir)
test_img = tf.reshape(tf.cast(tf.image.decode_jpeg(test_img_value, channels=3), dtype=tf.float32), shape=[image_height, image_width, 3])
test_label = tf.reshape(tf.one_hot(test_label, depth=num_out, on_value=1.0, off_value=0.0), shape=[num_out])
test_gender = tf.reshape(test_gender, shape=[1])
def ex4():

    # tf.train.string_input_producer()으로 필요 파일들을
    # 랜덤(T,F 설정 가능)으로 filename queue 에 추가(enqueue)
    filename_queue = tf.train.string_input_producer(
        string_tensor=['test-score.csv'], shuffle=False, name='filename_queue')

    # 데이타 포맷 (csv, tfrecord, TextLineReader 등) 에 맞는 reader 를 통해서
    # filename queue 에서 dequeue 된 파일들을 value에 담는다.
    reader = tf.TextLineReader()
    key, value = reader.read(filename_queue)

    # Default values, in case of empty columns. Also specifies the type of the
    # decoded result.
    # value들을 csv로 decode 한다.
    # http://bcho.tistory.com/1165?category=555440
    record_defaults = [[0.], [0.], [0.], [0.]]  # (csv 형식)
    xy = tf.decode_csv(value, record_defaults=record_defaults)

    # collect batches of csv in
    train_x_batch, train_y_batch = tf.train.batch([xy[0:-1], xy[-1:]],
                                                  batch_size=25)

    # placeholders for a tensor that will be always fed.
    X = tf.placeholder(tf.float32, shape=[None, 3])
    Y = tf.placeholder(tf.float32, shape=[None, 1])

    W = tf.Variable(tf.random_normal([3, 1]), name='weight')
    b = tf.Variable(tf.random_normal([1]), name='bias')

    # Hypothesis
    hypothesis = tf.matmul(X, W) + b

    # Simplified cost/loss function
    cost = tf.reduce_mean(tf.square(hypothesis - Y))

    # Minimize
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-5)
    train = optimizer.minimize(cost)

    # Launch the graph in a session.
    sess = tf.Session()
    # Initializes global variables in the graph.
    sess.run(tf.global_variables_initializer())

    # Start populating the filename queue.
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)

    x_batch, y_batch = sess.run([train_x_batch, train_y_batch])

    for step in range(2001):
        cost_val, hy_val, _ = sess.run([cost, hypothesis, train],
                                       feed_dict={
                                           X: x_batch,
                                           Y: y_batch
                                       })
        if step % 10 == 0:
            print(step, "Cost: ", cost_val, "\nPrediction:\n", hy_val)

    coord.request_stop()
    coord.join(threads)

    # Ask my score
    print("Your score will be ",
          sess.run(hypothesis, feed_dict={X: [[100, 70, 101]]}))

    print("Other scores will be ",
          sess.run(hypothesis, feed_dict={X: [[60, 70, 110], [90, 100, 80]]}))
    '''
Beispiel #54
0
 def decode_csv(value_column):
     columns = tf.decode_csv(records = value_column, record_defaults = DEFAULTS)
     features = dict(zip(CSV_COLUMNS, columns))          
     label = features.pop(LABEL_COLUMN)         
     return features, label
Beispiel #55
0
def iris_parser(record):
    record_types = [tf.float32, tf.float32, tf.float32, tf.float32, tf.int32, tf.int32, tf.int32]
    line = tf.decode_csv(record, record_types, field_delim=',')
    return line
preprocessing.resize()  # Resize image to 64 by 64 and change it to grayscale
preprocessing.make_csv()  # Create labeled image csv

# hyper parameter
IMAGE_WIDTH = 64
IMAGE_HEIGHT = 64
BATCH_SIZE = 125
NUM_CLASSES = 2
CHECK_POINT_DIR = TB_SUMMARY_DIR = "./tensor_board/"

# read csv
csv_file = tf.train.string_input_producer(["./label.csv"], shuffle=True)
csv_reader = tf.TextLineReader()
_, line = csv_reader.read(csv_file)

image_file, label_decoded = tf.decode_csv(line, record_defaults=[[""], [""]])
image_decoded = tf.image.decode_jpeg(tf.read_file(image_file), channels=1)
image_cast = tf.cast(image_decoded, tf.float32)
image = tf.reshape(image_cast,
                   [IMAGE_WIDTH, IMAGE_HEIGHT, 1])  # 64 by 64 , grayscale

test_batch = int(12500 / BATCH_SIZE)
test_image_list = [
    './resize_test/' + file_name for file_name in os.listdir('./resize_test/')
]
test_image_reader = tf.WholeFileReader()
test_image_name = tf.train.string_input_producer(test_image_list)
_, value = test_image_reader.read(test_image_name)
test_image_decode = tf.cast(tf.image.decode_jpeg(value, channels=1),
                            tf.float32)
test_image = tf.reshape(test_image_decode, [IMAGE_WIDTH, IMAGE_HEIGHT, 1])
Beispiel #57
0
 def parse_batch(value_column):
     columns = tf.decode_csv(value_column, record_defaults = DEFAULTS)
     features = dict(zip(CSV_COLUMNS, columns))
     label = features.pop(LABEL_COLUMN)
     return add_engineered(features), label
import tensorflow as tf

tf.set_random_seed(777)

filename_queue = tf.train.string_input_producer(['data-01-test-score.csv'],
                                                shuffle=False,
                                                name='filename_queue')

reader = tf.TextLineReader()
key, value = reader.read(filename_queue)

record_defaults = [[0.], [0.], [0.], [0.]]
xy = tf.decode_csv(value, record_defaults=record_defaults)

train_x_batch, train_y_batch = tf.train.batch([xy[0:-1], xy[-1:]],
                                              batch_size=10)

X = tf.placeholder(tf.float32, shape=[None, 3])
Y = tf.placeholder(tf.float32, shape=[None, 1])

W = tf.Variable([[0.], [0.], [0.]], name='weight')
b = tf.Variable([0.], name='bias')

hypothesis = tf.matmul(X, W) + b
cost = tf.reduce_mean(tf.square(hypothesis - Y))

optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-5)
train = optimizer.minimize(cost)

sess = tf.Session()
Beispiel #59
0
'''
    import data
'''
directory = "./*.csv"
filename = "./result/estr3108/resultbh1n1.csv"
csvfile = file(filename, 'wb')
writer = csv.writer(csvfile)
filename_queue = tf.train.string_input_producer(
    tf.train.match_filenames_once(directory), shuffle=True)
line_reader = tf.TextLineReader(skip_header_lines=1)
_, csv_row = line_reader.read(filename_queue)

record_defaults = [[0], [""]]
min = 5000
capacity = min + 5 * 100
labelnn, featuress = tf.decode_csv(csv_row, record_defaults=record_defaults)
labeln, features = tf.train.shuffle_batch([labelnn, featuress],
                                          batch_size=100,
                                          min_after_dequeue=min,
                                          capacity=capacity)
'''
    construct the model
'''
# layer 1
y_ = tf.placeholder(tf.float32, shape=[None, types])
x = tf.placeholder(tf.float32, shape=[None, 4, length])
xd = tf.reshape(x, [-1, 4, length, 1])
W_conv1 = weight_variable(sh_w1)
b_conv1 = bias_variable([out1])
h_conv1 = tf.nn.relu(conv2d(xd, W_conv1) + b_conv1)
h_pool1 = maxpool(h_conv1)
import tensorflow

filename_queue = tensorflow.train.string_input_producer(
    ['data-01-test-score.csv'], shuffle=False, name='filename-queue')

reader = tensorflow.TextLineReader()
key, value = reader.read(filename_queue)

record_defaults = [[0.], [0.], [0.], [0.]]
xy = tensorflow.decode_csv(value, record_defaults=record_defaults)

train_x_batch, train_y_batch = tensorflow.train.batch([xy[0:-1], xy[-1:]],
                                                      batch_size=10)

X = tensorflow.placeholder(tensorflow.float32, shape=[None, 3])
Y = tensorflow.placeholder(tensorflow.float32, shape=[None, 1])

W = tensorflow.Variable(tensorflow.random_normal([3, 1]), name='weight')
b = tensorflow.Variable(tensorflow.random_normal([1]), name='bias')

hypothesis = tensorflow.matmul(X, W) + b

cost = tensorflow.reduce_mean(tensorflow.square(hypothesis - Y))

optimizer = tensorflow.train.GradientDescentOptimizer(learning_rate=1e-5)
train = optimizer.minimize(cost)

session = tensorflow.Session()
session.run(tensorflow.global_variables_initializer())

coordinator = tensorflow.train.Coordinator()