Python DataCollection.split Examples

Programming Language: Python

Namespace/Package Name: DeepJetCore.DataCollection

Class/Type: DataCollection

Method/Function: split

Examples at hotexamples.com: 2

Python DataCollection.split - 2 examples found. These are the top rated real world Python examples of DeepJetCore.DataCollection.DataCollection.split extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

DataCollection(28)

readFromFile(11)

writeToFile(6)

dataclass(5)

invokeGenerator(5)

convertListOfRootFiles(4)

setBatchSize(4)

getAllFeatures(3)

samples(3)

getAllLabels(3)

getUsedTruth(3)

createTestDataForDataCollection(3)

getSamplePath(3)

prependToSampleFiles(2)

getAllWeights(2)

getInputShapes(2)

getNClassificationTargets(2)

getNRegressionTargets(2)

split(2)

sourceList(2)

dataDir(2)

validate(1)

setDataClass(1)

removeEntry(1)

recoverCreateDataFromRootFromSnapshot(1)

generatorFunction(1)

batch_mode(1)

nprocs(1)

generator(1)

meansnormslimit(1)

istestdata(1)

defineCustomPredictionLabels(1)

createDataFromRoot(1)

__nsamples(1)

getBatchSize(1)

no_copy_on_convert(1)

Example #1

Show file

File: data.py Project: abao1999/DRNOC

from DeepJetCore.DataCollection import DataCollection
import sys

djdc_path = sys.argv[1]

train_data = DataCollection(djdc_path)

# splits off 10% of the training dataset for validation. Can be used in the same way as train_data
val_data = train_data.split(0.9)

# Set the batch size.
# If the data is ragged in dimension 1 (see convert options),
# then this is the maximum number of elements per batch, which could be distributed differently
# to individual examples. E.g., if the first example has 50 elements, the second 48, and the third 30,
# and the batch size is set to 100, it would return the first two examples (in total 99 elements) in
# the first batch etc. This is helpful to avoid out-of-memory errors during training

train_data.setBatchSize(100)

print("batch size: 100")
# prepare the generator

train_data.invokeGenerator()

# loop over epochs here ...

train_data.generator.shuffleFilelist()
train_data.generator.prepareNextEpoch()

# this number can differ from epoch to epoch for ragged data!
nbatches = train_data.generator.getNBatches()

Example #2

Show file

File: recurrent_cal.py Project: mverzett/DeepHGCal

    def train(self):

        placeholder_input, placeholder_output = self.model.get_placeholders()
        graph_output = self.model.get_compute_graphs()
        graph_loss = self.model.get_losses()
        graph_optmiser = self.model.get_optimizer()
        graph_summary = self.model.get_summary()

        if self.from_scratch:
            self.clean_summary_dir()

        init = tf.global_variables_initializer()
        with tf.Session() as sess:
            sess.run(init)
            if self.use_tf_records:
                coord = tf.train.Coordinator()
                threads = tf.train.start_queue_runners(coord=coord)
                record_batch_input, record_batch_target = self.get_record_placeholders(
                )
            else:
                input_data = self.config['train_data_path']
                train_data = DataCollection()
                train_data.readFromFile(input_data)

                val_data = train_data.split(0.1)
                train_data = train_data.split(0.9)
                train_data.setBatchSize(self.batch_size)
                val_data.setBatchSize(self.batch_size)
                val_data_generator = train_data.generator()
                train_data_generator = train_data.generator()

            summary_writer = tf.summary.FileWriter(self.summary_path,
                                                   sess.graph)

            if not self.from_scratch:
                self.saver_all.restore(sess, self.model_path)
                print("\n\nINFO: Loading model\n\n")
                with open(self.model_path + '.txt', 'r') as f:
                    iteration_number = int(f.read())
            else:
                iteration_number = 0

            print("Starting iterations")
            while iteration_number < self.train_for_iterations:
                if self.use_tf_records:
                    input, output = sess.run(
                        [record_batch_input, record_batch_target])
                    input = [
                        np.fromstring(''.join(i)).reshape(
                            13, 13, int(self.config['num_layers']),
                            int(self.config['num_channels'])) for i in input
                    ]
                    output = [
                        np.fromstring(''.join(i)).reshape(
                            13, 13, int(self.config['num_layers']))
                        for i in output
                    ]
                else:
                    input, output, _ = train_data_generator.next()
                    input = np.squeeze(input, axis=0)
                    output = np.squeeze(output, axis=0)

                _, eval_loss, _, eval_summary = sess.run(
                    [graph_output, graph_loss, graph_optmiser, graph_summary],
                    feed_dict={
                        placeholder_input: input,
                        placeholder_output: output
                    })
                print("Iteration %4d: loss %0.5f" %
                      (iteration_number, eval_loss))
                iteration_number += 1
                summary_writer.add_summary(eval_summary, iteration_number)
                if iteration_number % self.save_after_iterations == 0:
                    print("\n\nINFO: Saving model\n\n")
                    self.saver_all.save(sess, self.model_path)
                    with open(self.model_path + '.txt', 'w') as f:
                        f.write(str(iteration_number))
            if self.use_tf_records:
                # Stop the threads
                coord.request_stop()

                # Wait for threads to stop
                coord.join(threads)