Beispiel #1
0
def convert_to_proto(data):
    # data format example: ('0x87', ['hello', 'world'], [1.2, 0.8])
    feature = {
        'id': _bytes_feature(data[0].encode('utf-8')),
        'mutate': _bytes_feature(tf.serialize_tensor(data[1]).numpy()),
        'label': _bytes_feature(tf.serialize_tensor(data[2]).numpy())
    }
    exp = tf.train.Example(features=tf.train.Features(feature=feature))
    return exp
def serialize_example(partial_voxel, full_voxel):
    '''
    Serialize given partial, full voxel pair.
    '''

    feature = {
        'partial': _bytes_feature(tf.serialize_tensor(partial_voxel).numpy()),
        'full': _bytes_feature(tf.serialize_tensor(full_voxel).numpy()),
    }

    # Serialize w/ tf.train.Example.
    example_proto = tf.train.Example(features=tf.train.Features(
        feature=feature))
    return example_proto.SerializeToString()
Beispiel #3
0
def write_tfrecord(fname, dataset, log_every=100, pre_fn=None):
    """Helper function to convert dataset object into tfrecord file.

    fname must end with .yml or .yaml.
    The data will be written in a .tfr file with the same suffix.

    Args:
        dataset (Dataset): input dataset.
        fname (str): filename of the dataset to be saved.
    """
    def _bytes_feature(value):
        """Returns a bytes_list from a string / byte."""
        return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

    # Preperation
    tfr = '.'.join(fname.split('.')[:-1] + ['tfr'])
    writer = tf.python_io.TFRecordWriter(tfr)
    tensors = dataset.make_one_shot_iterator().get_next()
    if pre_fn:
        tensors = pre_fn(tensors)
        dataset = dataset.map(pre_fn)
    types = dataset.output_types
    shapes = dataset.output_shapes
    # Sanity check
    assert (type(types) == dict and all(type(v) != dict for v in types.values())),\
        "Only dataset of non-nested dictionary is supported."
    assert fname.endswith('.yml'), "Filename must end with .yml."
    serialized = {k: tf.serialize_tensor(v) for k, v in tensors.items()}
    sess = tf.Session()
    # Writing Loop
    n_parsed = 0
    try:
        while True:
            features = {}
            example = tf.train.Example(features=tf.train.Features(
                feature={
                    key: _bytes_feature(val)
                    for key, val in sess.run(serialized).items()
                }))
            writer.write(example.SerializeToString())
            n_parsed += 1
            if n_parsed % log_every == 0:
                sys.stdout.write('\r {} samples written to {} ...'.format(
                    n_parsed, tfr))
                sys.stdout.flush()
    except tf.errors.OutOfRangeError:
        print('\r {} samples written to {}, done.'.format(n_parsed, tfr))
        sess.close()
        writer.close()
    # Write metadata
    format_dict = {
        k: {
            'dtype': types[k].name,
            'shape': shapes[k].as_list()
        }
        for k in types.keys()
    }
    info_dict = {'n_sample': n_parsed}
    with FileIO(fname, 'w') as f:
        yaml.safe_dump({'format': format_dict, 'info': info_dict}, f)
Beispiel #4
0
def tf_io_encode_raw(x):
  x = tf.convert_to_tensor(x)
  unit_size = x.dtype.size
  total_size = tf.size(x, out_type=tf.int64) * unit_size
  serialized = tf.serialize_tensor(x)
  serialized_size = tf.size(tf.strings.bytes_split(serialized), out_type=tf.int64)
  offset = serialized_size - total_size
  return tf.strings.substr(serialized, offset, -1)
Beispiel #5
0
def serialize_example(point_cloud_views, xyz, label):
    '''
    Serialize given SDF data.
    '''

    feature = {
        'point_clouds':
        _bytes_feature(tf.serialize_tensor(point_cloud_views).numpy()),
        'xyzs':
        _bytes_feature(tf.serialize_tensor(xyz).numpy()),
        'labels':
        _bytes_feature(tf.serialize_tensor(label).numpy()),
    }

    # Serialize w/ tf.train.Example.
    example_proto = tf.train.Example(features=tf.train.Features(
        feature=feature))
    return example_proto.SerializeToString()
def prepare_data(date,
                 purpose='training',
                 source='20CR2c',
                 variable='prmsl',
                 member=1,
                 normalise=None,
                 opfile=None):
    """Make tf.load-able files, suitably normalised for training ML models 

    Data will be stored in directory $SCRATCH/Machine-Learning-experiments.

    Args:
        date (obj:`datetime.datetime`): datetime to get data for.
        purpose (:obj:`str`): 'training' (default) or 'test'.
        source (:obj:`str`): Where to get the data from - at the moment, only '20CR2c' is supported .
        variable (:obj:`str`): Variable to use (e.g. 'prmsl')
        normalise: (:obj:`func`): Function to normalise the data (to mean=0, sd=1). Function must take an :obj:`iris.cube.cube' as argument and returns a normalised cube as result. If None (default) use a standard normalisation function (see :func:`normalise`.

    Returns:
        Nothing, but creates, as side effect, a tf.load-able file with the normalised data for the given source, variable, and date.

    Raises:
        ValueError: Unsupported source, or can't load the original data, or normalisation failed.

    |
    """
    if opfile is None:
        opfile = ("%s/Machine-Learning-experiments/datasets/%s/%s/%s" %
                  (os.getenv('SCRATCH'), source, variable, purpose))
    if not os.path.isdir(os.path.dirname(opfile)):
        os.makedirs(os.path.dirname(opfile))

    ic = twcr.load(variable,
                   datetime.datetime(date.year, date.month, date.day,
                                     date.hour),
                   version=args.version)

    # Reduce to selected ensemble member
    ic = ic.extract(iris.Constraint(member=args.member))

    # Normalise (to mean=0, sd=1)
    if normalise is None:
        normalise = get_normalise_function(source, variable)
    ic.data = normalise(ic.data)

    # Convert to Tensor
    ict = tf.convert_to_tensor(ic.data, numpy.float32)

    # Write to tfrecord file
    sict = tf.serialize_tensor(ict)
    tf.io.write_file(opfile, sict)
def serialize_example(image, label):
    """Create a tf.Example message ready to be written to a file.
    """

    image_str = tf.serialize_tensor(image)

    feature = {
        "image": _bytes_feature(image_str.numpy()),
        "label": _int64_feature(label)
    }

    example_proto = tf.train.Example(features=tf.train.Features(
        feature=feature))
    return example_proto.SerializeToString()
Beispiel #8
0
def mywrite():
    # some data
    data = ('0x87', ['hello', 'world'], [1.2, 0.8])
    tf.serialize_tensor(data[0].encode('utf8'))
    _bytes_feature(data[0])
    tf.parse_tensor(tf.serialize_tensor(data[1]).numpy(), out_type=tf.string)
    tf.parse_tensor(tf.serialize_tensor(data[2]).numpy(), out_type=tf.float32)
    sess = tf.Session()
    tf.serialize_tensor(data[1]).numpy()
    _bytes_feature(tf.serialize_tensor(data[1]).numpy())
    tf.train.FeatureList(bytes_list=tf.train.BytesList(
        value=[tf.serialize_tensor(data[1])]))
    s = exp.SerializeToString()
    exp = tf.train.Example.FromString(s)
    feature_description = {
        'id': tf.FixedLenFeature([], tf.string, default_value=''),
        'mutate': tf.FixedLenFeature([], tf.string, default_value=''),
        'label': tf.FixedLenFeature([], tf.string, default_value=''),
        # 'feature3': tf.FixedLenFeature([], tf.float32, default_value=0.0),
    }
    tf.parse_single_example(exp[0], feature_description)
    tf.parse_single_example(tf.data.Dataset.from_tensor_slices(exp),
                            feature_description)

    with tf.python_io.TFRecordWriter('111.tfrec') as writer:
        writer.write(exp.SerializeToString())

    def _my_parse_function(pto):
        # Parse the input tf.Example proto using the dictionary above.
        return tf.parse_single_example(pto, feature_description)

    raw = tf.data.TFRecordDataset('111.tfrec')
    raw
    parsed_image_dataset = raw.map(_my_parse_function)
    parsed_image_dataset

    for a in parsed_image_dataset:
        print(a)
        print(a['label'])
        # shit!
        print(tf.parse_tensor(a['label'], out_type=tf.float32))

    feature['label']
    tf.parse_tensor(feature['label'], out_type=tf.float32)
Beispiel #9
0
def ds2tfrecord(ds, filepath):
    with tf.python_io.TFRecordWriter(filepath) as writer:
        feat_dict = ds.make_one_shot_iterator().get_next()
        serialized_dict = {name: tf.serialize_tensor(fea) for name, fea in feat_dict.items()}
        with tf.Session() as sess:
            try:
                while True:
                    features = {}
                    for name, serialized_tensor in serialized_dict.items():
                        bytes_string = sess.run(serialized_tensor)
                        bytes_list = tf.train.BytesList(value=[bytes_string])
                        features[name] = tf.train.Feature(bytes_list=bytes_list)
                    # Create a Features message using tf.train.Example.
                    example_proto = tf.train.Example(features=tf.train.Features(feature=features))
                    example_string = example_proto.SerializeToString()
                    # Write to TFRecord
                    writer.write(example_string)
            except tf.errors.OutOfRangeError:
                pass
    ic = twcr.load(
        args.variable,
        datetime.datetime(args.year, args.month, args.day, args.hour) +
        datetime.timedelta(hours=24),
        version='2c')
    ic = ic.extract(iris.Constraint(member=args.member))
    ic = rr_cube(ic)
    # Normalise to mean=0, sd=1 (approx)
    if args.variable == 'prmsl':
        ic.data -= 101325
        ic.data /= 3000
    elif args.variable == 'air.2m':
        ic.data -= 280
        ic.data /= 50
    elif args.variable == 'z500':
        ic.data -= 5300
        ic.data /= 600
    elif args.variable == 'prate':
        ic.data = ic.data * 1000 + 1.001
        ic.data = numpy.log(ic.data)

else:
    raise ValueError('Source %s is not supported' % args.source)

# Convert to Tensor
ict = tf.convert_to_tensor(ic.data, numpy.float32)

# Write to tfrecord file
sict = tf.serialize_tensor(ict)
tf.io.write_file(args.opfile, sict)
                 datetime.datetime(args.year, args.month, args.day, args.hour),
                 version='2c')
u10m = tensor_cube(u10m.extract(iris.Constraint(member=1)))
v10m = twcr.load('vwnd.10m',
                 datetime.datetime(args.year, args.month, args.day, args.hour),
                 version='2c')
v10m = tensor_cube(v10m.extract(iris.Constraint(member=1)))

# Convert the validation data into tensor format
t2m_t = tf.convert_to_tensor(normalise_t2m(t2m).data, numpy.float32)
t2m_t = tf.reshape(t2m_t, [79, 159, 1])
prmsl_t = tf.convert_to_tensor(normalise_prmsl(prmsl).data, numpy.float32)
prmsl_t = tf.reshape(prmsl_t, [79, 159, 1])
u10m_t = tf.convert_to_tensor(normalise_wind(u10m).data, numpy.float32)
u10m_t = tf.reshape(u10m_t, [79, 159, 1])
v10m_t = tf.convert_to_tensor(normalise_wind(v10m).data, numpy.float32)
v10m_t = tf.reshape(v10m_t, [79, 159, 1])

# Get encoded versions of the validation data
model_save_file = ("%s/Machine-Learning-experiments/" +
                   "convolutional_autoencoder_perturbations/" +
                   "multivariate_uk_centred_var/saved_models/" +
                   "Epoch_%04d/encoder") % (os.getenv('SCRATCH'), args.epoch)
encoder = tf.keras.models.load_model(model_save_file, compile=False)
ict = tf.concat([t2m_t, prmsl_t, u10m_t, v10m_t], 2)  # Now [79,159,4]
ict = tf.reshape(ict, [1, 79, 159, 4])
result = encoder.predict_on_batch(ict)
result = tf.convert_to_tensor(result, numpy.float32)
sict = tf.serialize_tensor(result)
tf.write_file(args.opfile, sict)
Beispiel #12
0
def serialize(image, label, one_hot, path):
    image_string = tf.serialize_tensor(image)
    one_hot_string = tf.serialize_tensor(one_hot)

    return image_string, label, one_hot_string, path
Beispiel #13
0
 def serialize_sparse_tensor(self, sparse_tensor):
     return tf.serialize_tensor(tf.serialize_sparse(sparse_tensor)).numpy()
Beispiel #14
0
 def serialize_tensor(self, tensor):
     return tf.serialize_tensor(tensor).numpy()
Beispiel #15
0
tf.dtypes

tf.get_collection()
tf.get_collection_ref()
tf.get_default_session()
tf.get_local_variable
tf.get_seed()
tf.get_session_handle()
tf.get_session_tensor()
tf.get_default_graph()
tf.get_summary_op()
tf.get_variable()
tf.get_variable_scope()
tf.set_random_seed()
tf.serialize_tensor()
tf.save_v2()
tf.scalar_mul()
tf.scan()
tf.scatter_add()
tf.scatter_div()
tf.scatter_mul()
tf.scatter_nd()
tf.scatter_nd_add()
tf.scatter_nd_non_aliasing_add()
tf.scatter_nd_sub()
tf.scatter_nd_update()
tf.scatter

tf.tables_initializer()
tf.tensordot()