def just_read_batch_obs(dataset_url, key, secret, endpoint):
  with make_batch_carbon_reader(dataset_url, key=key, secret=secret, endpoint=endpoint, num_epochs=1,
                                workers_count=16) as train_reader:
    i = 0
    for schema_view in train_reader:
      i += len(schema_view.imagename)
    print(i)
def tensorflow_hello_world(dataset_url='file:///tmp/carbon_external_dataset'):
    # Example: tf_tensors will return tensors with dataset data
    with make_batch_carbon_reader(dataset_url) as reader:
        tensor = tf_tensors(reader)
        with tf.Session() as sess:
            # Because we are using make_batch_reader(), each read returns a batch of rows instead of a single row
            batched_sample = sess.run(tensor)
            print("id batch: {0}".format(batched_sample.id))

    # Example: use tf.data.Dataset API
    with make_batch_carbon_reader(dataset_url) as reader:
        dataset = make_pycarbon_dataset(reader)
        iterator = dataset.make_one_shot_iterator()
        tensor = iterator.get_next()
        with tf.Session() as sess:
            batched_sample = sess.run(tensor)
            print("id batch: {0}".format(batched_sample.id))
Esempio n. 3
0
def just_read_batch_obs(key, secret, endpoint, bucketname, prefix, download_path):
  path = 'file://' + download_files_from_obs_concurrently(key, secret, endpoint, bucketname, prefix, download_path)

  with make_batch_carbon_reader(path, key=key, secret=secret, endpoint=endpoint, num_epochs=1) as train_reader:
    i = 0
    for schema_view in train_reader:
      i += len(schema_view.imagename)
    print(i)
Esempio n. 4
0
def python_hello_world(dataset_url='file:///tmp/carbon_external_dataset'):
    # Reading data from the non-Pycarbon Carbon via pure Python
    with make_batch_carbon_reader(dataset_url,
                                  schema_fields=["id", "value1",
                                                 "value2"]) as reader:
        for schema_view in reader:
            # make_batch_reader() returns batches of rows instead of individual rows
            print("Batched read:\nid: {0} value1: {1} value2: {2}".format(
                schema_view.id, schema_view.value1, schema_view.value2))
def just_read_batch(dataset_url='file:///tmp/benchmark_dataset'):
  values = [5]
  predicate = in_set(values, "id")
  with make_batch_carbon_reader(dataset_url, num_epochs=1, workers_count=16,
                                predicate=predicate) as train_reader:
    i = 0
    for schema_view in train_reader:
      assert schema_view.id == 5
      i += len(schema_view.id)
    print(i)
Esempio n. 6
0
def just_read_batch(dataset_url='file:///tmp/benchmark_dataset'):
    with make_batch_carbon_reader(dataset_url,
                                  num_epochs=1,
                                  workers_count=16,
                                  schema_fields=["id",
                                                 "value1"]) as train_reader:
        i = 0
        for schema_view in train_reader:
            assert len(schema_view) == 2
            assert schema_view._fields == ('id', 'value1')
            i += len(schema_view.id)
        print(i)
def just_read_batch(dataset_url='file:///tmp/benchmark_dataset'):
    with make_batch_carbon_reader(dataset_url, num_epochs=1) as train_reader:
        i = 0
        start = time.time()
        for schema_view in train_reader:
            for j in range(len(schema_view.id)):
                schema_view.id[j]
                i += 1
                if i % ROW_COUNT == 0:
                    end = time.time()
                    print("time is " + str(end - start))
                    start = end

        print(i)
Esempio n. 8
0
def just_read_batch(dataset_url='file:///tmp/benchmark_dataset'):
    with make_batch_carbon_reader(
            dataset_url,
            num_epochs=1,
            workers_count=16,
            shuffle_row_drop_partitions=5) as train_reader:
        result = list()
        i = 0
        for schema_view in train_reader:
            i += len(schema_view.id)
            for id in schema_view.id:
                result.append(id)
        print(i)
        print(result)
def test_generate(external_dataset):
  # Read from it using a plain reader
  with make_batch_carbon_reader(external_dataset.url) as reader:
    all_samples = list(reader)
  assert all_samples
def train_and_test(dataset_url, num_epochs, batch_size, evaluation_interval):
    """
  Train a model for training iterations with a batch size batch_size, printing accuracy every log_interval.
  :param dataset_url: The MNIST dataset url.
  :param num_epochs: The number of epochs to train for.
  :param batch_size: The batch size for training.
  :param evaluation_interval: The interval used to print the accuracy.
  :return:
  """

    with make_batch_carbon_reader(os.path.join(dataset_url, 'train'),
                                  num_epochs=num_epochs) as train_reader:
        with make_batch_carbon_reader(os.path.join(dataset_url, 'test'),
                                      num_epochs=num_epochs) as test_reader:
            # Create the model
            x = tf.placeholder(tf.float32, [None, 784])
            w = tf.Variable(tf.zeros([784, 10]))
            b = tf.Variable(tf.zeros([10]))
            y = tf.matmul(x, w) + b

            # Define loss and optimizer
            y_ = tf.placeholder(tf.int64, [None])

            # Define the loss function
            cross_entropy = tf.losses.sparse_softmax_cross_entropy(labels=y_,
                                                                   logits=y)

            train_step = tf.train.GradientDescentOptimizer(0.5).minimize(
                cross_entropy)

            correct_prediction = tf.equal(tf.argmax(y, 1), y_)

            accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

            train_dataset = make_pycarbon_dataset(train_reader) \
              .apply(tf.data.experimental.unbatch()) \
              .batch(batch_size) \
              .map(decode)

            train_iterator = train_dataset.make_one_shot_iterator()
            label, image = train_iterator.get_next()

            test_dataset = make_pycarbon_dataset(test_reader) \
              .apply(tf.data.experimental.unbatch()) \
              .batch(batch_size) \
              .map(decode)

            test_iterator = test_dataset.make_one_shot_iterator()
            test_label, test_image = test_iterator.get_next()

            # Train
            print(
                'Training model for {0} epoch with batch size {1} and evaluation interval {2}'
                .format(num_epochs, batch_size, evaluation_interval))

            i = 0
            with tf.Session() as sess:
                sess.run([
                    tf.local_variables_initializer(),
                    tf.global_variables_initializer(),
                ])

                try:
                    while True:
                        cur_label, cur_image = sess.run([label, image])

                        sess.run([train_step],
                                 feed_dict={
                                     x: cur_image,
                                     y_: cur_label
                                 })

                        if i % evaluation_interval == 0:
                            test_cur_label, test_cur_image = sess.run(
                                [test_label, test_image])
                            print(
                                'After {0} training iterations, the accuracy of the model is: {1:.2f}'
                                .format(
                                    i,
                                    sess.run(accuracy,
                                             feed_dict={
                                                 x: test_cur_image,
                                                 y_: test_cur_label
                                             })))
                        i += 1

                except tf.errors.OutOfRangeError:
                    print("Finish! the number is " + str(i))
Esempio n. 11
0
def pytorch_hello_world(dataset_url='file:///tmp/carbon_external_dataset'):
    with DataLoader(make_batch_carbon_reader(dataset_url)) as train_loader:
        sample = next(iter(train_loader))
        # Because we are using make_batch_reader(), each read returns a batch of rows instead of a single row
        print("id batch: {0}".format(sample['id']))