Exemple #1
0
def get_config():
    basename = os.path.basename(__file__)
    logger.set_logger_dir(
        os.path.join('train_log', basename[:basename.rfind('.')]))

    # prepare dataset
    dataset_train = tp.BatchData(tp.dataset.Mnist('train'), 128)
    dataset_test = tp.BatchData(tp.dataset.Mnist('test'), 256, remainder=True)
    step_per_epoch = dataset_train.size()

    # prepare session
    sess_config = tp.get_default_sess_config()
    sess_config.gpu_options.per_process_gpu_memory_fraction = 0.5

    lr = tf.train.exponential_decay(
        learning_rate=1e-3,
        global_step=tp.get_global_step_var(),
        decay_steps=dataset_train.size() * 10,
        decay_rate=0.3, staircase=True, name='learning_rate')
    tf.scalar_summary('learning_rate', lr)

    return tp.TrainConfig(
        dataset=dataset_train,
        optimizer=tf.train.AdamOptimizer(lr),
        callbacks=Callbacks([
            StatPrinter(),
            ModelSaver(),
            InferenceRunner(dataset_test,
                [ScalarStats('cost'), ClassificationError() ])
        ]),
        session_config=sess_config,
        model=Model(),
        step_per_epoch=step_per_epoch,
        max_epoch=100,
    )
Exemple #2
0
def make_data_loaders(
    experiment_config: Dict[str, Any], hparams: Dict[str, Any]
) -> Tuple[Optional[tp.DataFlow], Optional[tp.DataFlow]]:
    """Provides training and validation data for model training."""
    download_dir = get_download_data_dir()
    training_dataflow = tp.BatchData(
        tp.dataset.Mnist("train", dir=download_dir), hparams["batch_size"])
    validation_dataflow = tp.BatchData(
        tp.dataset.Mnist("test", dir=download_dir), hparams["batch_size"])

    return training_dataflow, validation_dataflow
Exemple #3
0
def setup_imagereader_dataflow():
    # extract parameters from config (args)
    image_path = str(config.imagereader_image)
    cutout_width = int(config.imagereader_cutout_width)
    cutout_height = int(config.imagereader_cutout_height)
    min_length = int(config.min_len)
    keylines_path = str(config.imagereader_keylines)
    use_right = bool(config.imagereader_keylines_use_right)

    # create ImageReader dataflow
    ds = LineDataImageReader(image_path=image_path, cutout_width=cutout_width, cutout_height=cutout_height,
                             min_length=min_length, keylines_path=keylines_path, use_right=use_right)

    # set cutout width and height for setup
    config.C_WIDTH = cutout_width
    config.C_HEIGHT = cutout_height

    # print debug information about cutouts
    _L.debug("Processed {} cutouts".format(len(ds)))
    _L.debug("Cutout minimum length: {}".format(min_length))
    _L.debug("Cutout size: {}x{}".format(cutout_width, cutout_height))

    # batch to max 'BATCH_SIZE'
    ds = tp.BatchData(ds, config.BATCH_SIZE, remainder=True, use_list=False)
    if config.debug:
        ds = tp.PrintData(ds)

    return ds
Exemple #4
0
def setup_npz_dataflow(base_dir, prefetch=True):
    """Setup data generator

    Returns: Dataflow
    """
    from cnn.modules.npy_dataflow import MyPrefetchDataZMQ as MyNpzPrefetchDataZMQ

    # get Data from Server or Files
    lds = LineDataNPZ(base_dir, True if config.random_data else False, config.range)  # RNGDataFlow
    ds = lds

    if prefetch and (config.cmd == "train" or not config.return_results):
        # do that in a different process
        ds = MyNpzPrefetchDataZMQ(ds, nr_proc=1)

    # strip unnecessary tmp values
    ds = StripData(ds)

    # batch to max `BATCH_SIZE`
    ds = tp.BatchData(ds, config.BATCH_SIZE, remainder=True, use_list=False)
    if config.debug:
        ds = tp.PrintData(ds)

    #ds.client = lds.client

    return ds
Exemple #5
0
def get_cifar(train_or_test, batch_size=None):
    # Get CIFAR data generator
    df = tp.dataset.Cifar10(train_or_test)
    if batch_size:
        df = tp.BatchData(df, batch_size)
    df.reset_state()
    ds = df.get_data()
    return ds
Exemple #6
0
def setup_dataflow(base_dir = None, range=None, rnd=None):
    """Setup data generator

    Returns: Dataflow
    """
    from cnn.modules.fio import FBytesIO
    import os

    client_or_folder = None
    if not base_dir:
        client_or_folder = Client(config.ip, config.port)
        (connected, answer) = client_or_folder.connect()
        assert(not answer.error)
        data = answer.data

    else:
        client_or_folder = base_dir
        welcome_file = os.path.join(base_dir, "-1")
        if not os.path.exists(welcome_file):
            _L.critical("The given folder {} \
            doesn't seem to contain expected data".format(base_dir))
            exit(1)
        with open(welcome_file, "rb") as f:
            data = f.read()

    bt = FBytesIO(data)
    it = bt.extract("c")
    # read away the message ID and colon
    while next(it) != b":":
        pass

    # extract cutout width and height for Setup
    cw, ch = bt.unpack("!II")
    config.C_HEIGHT = ch if ch else None
    config.C_WIDTH = cw if cw else None

    _L.debug("Cutout size: {}x{}".format(cw, ch))

    # get Data from Server or Files
    lds = LineData(client_or_folder, range, rnd)  # RNGDataFlow
    ds = lds

    if config.cmd == "train" or not config.return_results:
        # do that in a different process
        ds = MyPrefetchDataZMQ(ds, nr_proc=1)

    # strip unnecessary tmp values
    ds = StripData(ds)
    # batch to max `BATCH_SIZE`
    ds = tp.BatchData(ds, config.BATCH_SIZE, remainder=True, use_list=False)

    if config.debug:
        ds = tp.PrintData(ds)

    ds.client = lds.client

    return ds
def get_data(data_dir,
             batch,
             vob_dict_path,
             POS_filter,
             Windsize=3,
             stride=1,
             is_train=False,
             nV=20,
             nF=300):
    ds = GraphDataFlow(data_dir, vob_dict_path, POS_filter, Windsize, stride,
                       is_train, nV, nF)
    ds = tp.BatchData(ds, batch, remainder=not is_train)
    ds = tp.PrefetchDataZMQ(ds, 10) if is_train else ds
    return ds
def main(mode=mode):
    """The main function"""
    if mode == "train":
        # Create dataset and iterator
        training_iterator = tp.dataset.Mnist('train')
        training_iterator = tp.BatchData(training_iterator, batch_size)

        # Build computation graph
        inputs_img, inputs_z, d_loss, g_loss = make_graph()
        d_train_op, g_train_op = train_op(d_loss, g_loss, lr_d=lr_d, lr_g=lr_g)

        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            # Run training
            samples, d_losses, g_losses = run_training(sess,
                                                       training_iterator,
                                                       d_train_op,
                                                       g_train_op,
                                                       inputs_img,
                                                       inputs_z,
                                                       d_loss,
                                                       g_loss,
                                                       n_epochs=NEPOCHS)
            print('Done training!')
        _ = plot_training_curves(d_losses, g_losses, "losses.png")
        #_ = plot_one_set_of_samples(samples, -1, "samples_training_epoch%d.png"%NEPOCHS)
        _ = plot_training_samples_improvement(samples,
                                              "samples_training_progress.png")

    elif mode == "gen":
        _, inputs_z, _, _ = make_graph()

        z = np.random.uniform(-1, 1, size=(9, z_dim))

        with tf.Session() as sess:
            samples = generate_samples(sess, inputs_z, z)

        _ = plot_one_set_of_samples([samples], 0, "samples_gen.png")

    return True
Exemple #9
0
 def build_validation_dataflow(self) -> tp.DataFlow:
     return tp.BatchData(
         tp.dataset.Mnist("test", dir=self.download_directory),
         self.context.get_per_slot_batch_size(),
     )