def tensorflow_hello_world(dataset_url='file:///tmp/carbon_pycarbon_dataset/'):
    # Example: tf_tensors will return tensors with dataset data
    with make_carbon_reader(dataset_url) as reader:
        tensor = tf_tensors(reader)
        with tf.Session() as sess:
            sample = sess.run(tensor)
            print(sample.id)

    with make_reader(dataset_url, is_batch=False) as reader:
        tensor = make_tensor(reader)
        with tf.Session() as sess:
            sample = sess.run(tensor)
            print(sample.id)

    # Example: use tf.data.Dataset API
    with make_carbon_reader(dataset_url) as reader:
        dataset = make_pycarbon_dataset(reader)
        iterator = dataset.make_one_shot_iterator()
        tensor = iterator.get_next()
        with tf.Session() as sess:
            sample = sess.run(tensor)
            print(sample.id)

    with make_reader(dataset_url, is_batch=False) as reader:
        dataset = make_dataset(reader)
        iterator = dataset.make_one_shot_iterator()
        tensor = iterator.get_next()
        with tf.Session() as sess:
            sample = sess.run(tensor)
            print(sample.id)
def tensorflow_hello_world(dataset_url='file:///tmp/carbon_external_dataset'):
    # Example: tf_tensors will return tensors with dataset data
    with make_batch_carbon_reader(dataset_url) as reader:
        tensor = tf_tensors(reader)
        with tf.Session() as sess:
            # Because we are using make_batch_carbon_reader(), each read returns a batch of rows instead of a single row
            batched_sample = sess.run(tensor)
            print("id batch: {0}".format(batched_sample.id))

    with make_reader(dataset_url) as reader:
        tensor = make_tensor(reader)
        with tf.Session() as sess:
            # Because we are using make_batch_carbon_reader(), each read returns a batch of rows instead of a single row
            batched_sample = sess.run(tensor)
            print("id batch: {0}".format(batched_sample.id))

    # Example: use tf.data.Dataset API
    with make_batch_carbon_reader(dataset_url) as reader:
        dataset = make_pycarbon_dataset(reader)
        iterator = dataset.make_one_shot_iterator()
        tensor = iterator.get_next()
        with tf.Session() as sess:
            batched_sample = sess.run(tensor)
            print("id batch: {0}".format(batched_sample.id))

    with make_reader(dataset_url) as reader:
        dataset = make_dataset(reader)
        iterator = dataset.make_one_shot_iterator()
        tensor = iterator.get_next()
        with tf.Session() as sess:
            batched_sample = sess.run(tensor)
            print("id batch: {0}".format(batched_sample.id))
Example #3
0
def _read_from_tf_tensors(synthetic_dataset,
                          count,
                          shuffling_queue_capacity,
                          min_after_dequeue,
                          ngram,
                          workers_count=10):
    """Used by several test cases. Reads a 'count' rows using reader.

  The reader is configured without row-group shuffling and guarantees deterministic order of rows up to the
  results queue TF shuffling which is controlled by 'shuffling_queue_capacity', 'min_after_dequeue' arguments.

  The function returns a tuple with: (actual data read from the dataset, a TF tensor returned by the reader)
  """

    schema_fields = (NON_NULLABLE_FIELDS if ngram is None else ngram)

    with make_carbon_reader(schema_fields=schema_fields,
                            dataset_url=synthetic_dataset.url,
                            reader_pool_type='thread',
                            shuffle_blocklets=False,
                            workers_count=workers_count) as reader:
        row_tensors = tf_tensors(
            reader,
            shuffling_queue_capacity=shuffling_queue_capacity,
            min_after_dequeue=min_after_dequeue)

        with _tf_session() as sess:
            rows_data = [sess.run(row_tensors) for _ in range(count)]

    return rows_data, row_tensors
Example #4
0
def test_simple_read_tensorflow(carbon_synthetic_dataset):
    """Read couple of rows. Make sure all tensors have static shape sizes assigned and the data matches reference
  data"""
    with make_carbon_reader(
            schema_fields=NON_NULLABLE_FIELDS,
            dataset_url=carbon_synthetic_dataset.url) as reader:
        row_tensors = tf_tensors(reader)
        with _tf_session() as sess:
            rows_data = [sess.run(row_tensors) for _ in range(30)]

    # Make sure we have static shape info for all fields
    _assert_all_tensors_have_shape(row_tensors)
    _assert_expected_rows_data(carbon_synthetic_dataset.data, rows_data)
Example #5
0
def test_simple_read_tensorflow_with_non_unischema_many_columns_dataset(
        carbon_many_columns_non_unischema_dataset):
    """Read couple of rows. Make sure all tensors have static shape sizes assigned and the data matches reference
  data"""
    with make_batch_carbon_reader(
            dataset_url=carbon_many_columns_non_unischema_dataset.url
    ) as reader:
        row_tensors = tf_tensors(reader)
        # Make sure we have static shape info for all fields
        for column in row_tensors:
            assert column.get_shape().as_list() == [None]

        with _tf_session() as sess:
            batch = sess.run(row_tensors)._asdict()
            assert set(batch.keys()) == set(
                carbon_many_columns_non_unischema_dataset.data[0].keys())
Example #6
0
def test_simple_read_tensorflow_with_carbon_dataset(carbon_scalar_dataset):
    """Read couple of rows. Make sure all tensors have static shape sizes assigned and the data matches reference
  data"""
    with make_batch_carbon_reader(
            dataset_url=carbon_scalar_dataset.url) as reader:
        row_tensors = tf_tensors(reader)
        # Make sure we have static shape info for all fields
        for column in row_tensors:
            assert column.get_shape().as_list() == [None]

        with _tf_session() as sess:
            for _ in range(2):
                batch = sess.run(row_tensors)._asdict()
                for i, id_value in enumerate(batch['id']):
                    expected_row = next(d for d in carbon_scalar_dataset.data
                                        if d['id'] == id_value)
                    for field_name in expected_row.keys():
                        _assert_fields_eq(batch[field_name][i],
                                          expected_row[field_name])
Example #7
0
def train_and_test(dataset_url, training_iterations, batch_size,
                   evaluation_interval, start):
    """
  Train a model for training iterations with a batch size batch_size, printing accuracy every log_interval.
  :param dataset_url: The MNIST dataset url.
  :param training_iterations: The training iterations to train for.
  :param batch_size: The batch size for training.
  :param evaluation_interval: The interval used to print the accuracy.
  :return:
  """
    with make_carbon_reader(os.path.join(dataset_url, 'train'),
                            num_epochs=None) as train_reader:
        with make_carbon_reader(os.path.join(dataset_url, 'test'),
                                num_epochs=None) as test_reader:
            train_readout = tf_tensors(train_reader)
            train_image = tf.cast(tf.reshape(train_readout.image, [784]),
                                  tf.float32)
            train_label = train_readout.digit
            batch_image, batch_label = tf.train.batch(
                [train_image, train_label], batch_size=batch_size)

            W = tf.Variable(tf.zeros([784, 10]))
            b = tf.Variable(tf.zeros([10]))
            y = tf.matmul(batch_image, W) + b

            # The raw formulation of cross-entropy,
            #
            #   tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(tf.nn.softmax(y)),
            #                                 reduction_indices=[1]))
            #
            # can be numerically unstable.
            #
            # So here we use tf.losses.sparse_softmax_cross_entropy on the raw
            # outputs of 'y', and then average across the batch.
            cross_entropy = tf.losses.sparse_softmax_cross_entropy(
                labels=batch_label, logits=y)
            train_step = tf.train.GradientDescentOptimizer(0.5).minimize(
                cross_entropy)

            correct_prediction = tf.equal(tf.argmax(y, 1), batch_label)
            accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

            test_readout = tf_tensors(test_reader)
            test_image = tf.cast(tf.reshape(test_readout.image, [784]),
                                 tf.float32)
            test_label = test_readout.digit
            test_batch_image, test_batch_label = tf.train.batch(
                [test_image, test_label], batch_size=batch_size)

            end = time.time()
            print("before train time: " + str(end - start))

            # Train
            print(
                'Training model for {0} training iterations with batch size {1} and evaluation interval {2}'
                .format(training_iterations, batch_size, evaluation_interval))
            with tf.Session() as sess:
                sess.run([
                    tf.local_variables_initializer(),
                    tf.global_variables_initializer(),
                ])
                coord = tf.train.Coordinator()
                threads = tf.train.start_queue_runners(sess=sess, coord=coord)
                try:
                    for i in range(training_iterations):
                        if coord.should_stop():
                            break

                        sess.run(train_step)

                        if (i % evaluation_interval) == 0 or i == (
                                training_iterations - 1):
                            feed_batch_image, feed_batch_label = sess.run(
                                [test_batch_image, test_batch_label])
                            print(
                                'After {0} training iterations, the accuracy of the model is: {1:.2f}'
                                .format(
                                    i,
                                    sess.run(accuracy,
                                             feed_dict={
                                                 batch_image: feed_batch_image,
                                                 batch_label: feed_batch_label
                                             })))
                finally:
                    coord.request_stop()
                    coord.join(threads)
Example #8
0
def test_shuffling_queue_with_make_batch_carbon_reader(carbon_scalar_dataset):
    with make_batch_carbon_reader(
            dataset_url=carbon_scalar_dataset.url) as reader:
        with pytest.raises(ValueError):
            tf_tensors(reader, 100, 90)