def get_config(): basename = os.path.basename(__file__) logger.set_logger_dir( os.path.join('train_log', basename[:basename.rfind('.')])) # prepare dataset dataset_train = tp.BatchData(tp.dataset.Mnist('train'), 128) dataset_test = tp.BatchData(tp.dataset.Mnist('test'), 256, remainder=True) step_per_epoch = dataset_train.size() # prepare session sess_config = tp.get_default_sess_config() sess_config.gpu_options.per_process_gpu_memory_fraction = 0.5 lr = tf.train.exponential_decay( learning_rate=1e-3, global_step=tp.get_global_step_var(), decay_steps=dataset_train.size() * 10, decay_rate=0.3, staircase=True, name='learning_rate') tf.scalar_summary('learning_rate', lr) return tp.TrainConfig( dataset=dataset_train, optimizer=tf.train.AdamOptimizer(lr), callbacks=Callbacks([ StatPrinter(), ModelSaver(), InferenceRunner(dataset_test, [ScalarStats('cost'), ClassificationError() ]) ]), session_config=sess_config, model=Model(), step_per_epoch=step_per_epoch, max_epoch=100, )
def make_data_loaders( experiment_config: Dict[str, Any], hparams: Dict[str, Any] ) -> Tuple[Optional[tp.DataFlow], Optional[tp.DataFlow]]: """Provides training and validation data for model training.""" download_dir = get_download_data_dir() training_dataflow = tp.BatchData( tp.dataset.Mnist("train", dir=download_dir), hparams["batch_size"]) validation_dataflow = tp.BatchData( tp.dataset.Mnist("test", dir=download_dir), hparams["batch_size"]) return training_dataflow, validation_dataflow
def setup_imagereader_dataflow(): # extract parameters from config (args) image_path = str(config.imagereader_image) cutout_width = int(config.imagereader_cutout_width) cutout_height = int(config.imagereader_cutout_height) min_length = int(config.min_len) keylines_path = str(config.imagereader_keylines) use_right = bool(config.imagereader_keylines_use_right) # create ImageReader dataflow ds = LineDataImageReader(image_path=image_path, cutout_width=cutout_width, cutout_height=cutout_height, min_length=min_length, keylines_path=keylines_path, use_right=use_right) # set cutout width and height for setup config.C_WIDTH = cutout_width config.C_HEIGHT = cutout_height # print debug information about cutouts _L.debug("Processed {} cutouts".format(len(ds))) _L.debug("Cutout minimum length: {}".format(min_length)) _L.debug("Cutout size: {}x{}".format(cutout_width, cutout_height)) # batch to max 'BATCH_SIZE' ds = tp.BatchData(ds, config.BATCH_SIZE, remainder=True, use_list=False) if config.debug: ds = tp.PrintData(ds) return ds
def setup_npz_dataflow(base_dir, prefetch=True): """Setup data generator Returns: Dataflow """ from cnn.modules.npy_dataflow import MyPrefetchDataZMQ as MyNpzPrefetchDataZMQ # get Data from Server or Files lds = LineDataNPZ(base_dir, True if config.random_data else False, config.range) # RNGDataFlow ds = lds if prefetch and (config.cmd == "train" or not config.return_results): # do that in a different process ds = MyNpzPrefetchDataZMQ(ds, nr_proc=1) # strip unnecessary tmp values ds = StripData(ds) # batch to max `BATCH_SIZE` ds = tp.BatchData(ds, config.BATCH_SIZE, remainder=True, use_list=False) if config.debug: ds = tp.PrintData(ds) #ds.client = lds.client return ds
def get_cifar(train_or_test, batch_size=None): # Get CIFAR data generator df = tp.dataset.Cifar10(train_or_test) if batch_size: df = tp.BatchData(df, batch_size) df.reset_state() ds = df.get_data() return ds
def setup_dataflow(base_dir = None, range=None, rnd=None): """Setup data generator Returns: Dataflow """ from cnn.modules.fio import FBytesIO import os client_or_folder = None if not base_dir: client_or_folder = Client(config.ip, config.port) (connected, answer) = client_or_folder.connect() assert(not answer.error) data = answer.data else: client_or_folder = base_dir welcome_file = os.path.join(base_dir, "-1") if not os.path.exists(welcome_file): _L.critical("The given folder {} \ doesn't seem to contain expected data".format(base_dir)) exit(1) with open(welcome_file, "rb") as f: data = f.read() bt = FBytesIO(data) it = bt.extract("c") # read away the message ID and colon while next(it) != b":": pass # extract cutout width and height for Setup cw, ch = bt.unpack("!II") config.C_HEIGHT = ch if ch else None config.C_WIDTH = cw if cw else None _L.debug("Cutout size: {}x{}".format(cw, ch)) # get Data from Server or Files lds = LineData(client_or_folder, range, rnd) # RNGDataFlow ds = lds if config.cmd == "train" or not config.return_results: # do that in a different process ds = MyPrefetchDataZMQ(ds, nr_proc=1) # strip unnecessary tmp values ds = StripData(ds) # batch to max `BATCH_SIZE` ds = tp.BatchData(ds, config.BATCH_SIZE, remainder=True, use_list=False) if config.debug: ds = tp.PrintData(ds) ds.client = lds.client return ds
def get_data(data_dir, batch, vob_dict_path, POS_filter, Windsize=3, stride=1, is_train=False, nV=20, nF=300): ds = GraphDataFlow(data_dir, vob_dict_path, POS_filter, Windsize, stride, is_train, nV, nF) ds = tp.BatchData(ds, batch, remainder=not is_train) ds = tp.PrefetchDataZMQ(ds, 10) if is_train else ds return ds
def main(mode=mode): """The main function""" if mode == "train": # Create dataset and iterator training_iterator = tp.dataset.Mnist('train') training_iterator = tp.BatchData(training_iterator, batch_size) # Build computation graph inputs_img, inputs_z, d_loss, g_loss = make_graph() d_train_op, g_train_op = train_op(d_loss, g_loss, lr_d=lr_d, lr_g=lr_g) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) # Run training samples, d_losses, g_losses = run_training(sess, training_iterator, d_train_op, g_train_op, inputs_img, inputs_z, d_loss, g_loss, n_epochs=NEPOCHS) print('Done training!') _ = plot_training_curves(d_losses, g_losses, "losses.png") #_ = plot_one_set_of_samples(samples, -1, "samples_training_epoch%d.png"%NEPOCHS) _ = plot_training_samples_improvement(samples, "samples_training_progress.png") elif mode == "gen": _, inputs_z, _, _ = make_graph() z = np.random.uniform(-1, 1, size=(9, z_dim)) with tf.Session() as sess: samples = generate_samples(sess, inputs_z, z) _ = plot_one_set_of_samples([samples], 0, "samples_gen.png") return True
def build_validation_dataflow(self) -> tp.DataFlow: return tp.BatchData( tp.dataset.Mnist("test", dir=self.download_directory), self.context.get_per_slot_batch_size(), )