def __init__(self, config: Config, shape):
        width = shape[1]
        height = shape[0]

        n_classes = config.get_value("n_classes", 13)
        n_channels = config.get_value("n_channels", 2)

        act = selu
        w_init = scaled_elu_initialization

        # tf Graph input
        X = tf.placeholder(tf.float32, [None, height, width, n_channels],
                           name="Features")
        y_ = tf.placeholder(tf.float32, [None, n_classes], name="Labels")
        d = tf.placeholder(tf.float32)

        print(X.get_shape())

        layers = list()
        layers.append(conv(X, w_init, act, k=3, s=1, out=32, id=1))
        layers.append(conv(layers[-1], w_init, act, k=3, s=1, out=32, id=2))
        layers.append(maxpool(layers[-1], k=2, s=2, id=1))

        layers.append(conv(layers[-1], w_init, act, k=3, s=1, out=64, id=3))
        layers.append(conv(layers[-1], w_init, act, k=3, s=1, out=64, id=4))
        layers.append(maxpool(layers[-1], k=2, s=2, id=2))

        layers.append(conv(layers[-1], w_init, act, k=3, s=1, out=96, id=5))
        layers.append(conv(layers[-1], w_init, act, k=3, s=1, out=96, id=6))
        layers.append(conv(layers[-1], w_init, act, k=3, s=1, out=96, id=7))
        layers.append(conv(layers[-1], w_init, act, k=3, s=1, out=96, id=8))
        layers.append(maxpool(layers[-1], k=2, s=2, id=3))

        # dense layers
        layers.append(
            fc(layers[-1], w_init, act, units=128, flatten=True, id=1))
        layers.append(dropout(layers[-1], d, act, id=1))

        layers.append(
            fc(layers[-1], w_init, act, units=128, flatten=False, id=2))
        layers.append(dropout(layers[-1], d, act, id=2))

        layers.append(
            fc(layers[-1],
               w_init,
               tf.identity,
               units=n_classes,
               flatten=False,
               id=3))

        # publish
        self.X = X
        self.y_ = y_
        self.dropout = d
        self.output = layers[-1].get_output()
        self.layers = layers
def main():
    config = Config()
    working_dir = os.path.join(config.working_dir, config.specs)
    working_dir = os.path.join(working_dir,
                               dt.datetime.now().strftime("%Y-%m-%dT%H-%M-%S"))
    make_sure_path_exists(working_dir)

    with open(os.path.join(working_dir, 'log.txt'), 'a') as logfile:
        sys.stdout = Tee(sys.stdout, logfile, sys.stdout)

        bl_config = config.get_value('bl_config')

        logger.configure(os.path.join(working_dir, 'baselines'),
                         ['tensorboard', 'log', 'stdout'])
        train(env_id=bl_config['env'],
              num_timesteps=bl_config['num_timesteps'],
              policy=config.get_value('policy'),
              working_dir=working_dir,
              config=config)

        sys.stdout.flush()
Beispiel #3
0
def main(_):
    config = Config()
    np.random.seed(config.get_value("random_seed", 12345))
    
    # PARAMETERS
    n_epochs = config.get_value("epochs", 100)
    batchsize = config.get_value("batchsize", 8)
    n_classes = config.get_value("n_classes", 3)
    dropout = config.get_value("dropout", 0.25)  # TODO
    num_threads = config.get_value("num_threads", 0)    # zzxue
    initial_val = config.get_value("initial_val", True)
    
    # READER, LOADER
    readers = invoke_dataset_from_config(config)
    reader_train = readers["train"]
    reader_val = readers["val"]
    train_loader = torch.utils.data.DataLoader(reader_train, batch_size=config.batchsize, shuffle=True,
                                               num_workers=num_threads)
    val_loader = torch.utils.data.DataLoader(reader_val, batch_size=1, shuffle=False, num_workers=num_threads)
    feats = np.zeros([24080, 256])
    vafeats = np.zeros([2625,256])
    with tf.Session() as sess:
        saver = tf.train.import_meta_graph("./model/checkpoint-752500.ckpt.meta")     #保存的模型路径
        saver.restore(sess, "./model/checkpoint-752500.ckpt")
        graph = tf.get_default_graph() 
        tensor_name_list = [tensor.name for tensor in graph.as_graph_def().node]# 得到当前图中所有变量的名称
        x_holder = graph.get_tensor_by_name("Features:0")  #    获取占位符
        fc3_features=graph.get_tensor_by_name("FC-2_2/selu/mul_1:0")     #获取要提取的特征,用名字FC-3_2/IdentityFC-2_2/selu/mul_1:0
        keep_prob=graph.get_tensor_by_name("Placeholder:0") #Labels:0
        for mbi, mb in enumerate(train_loader):
            feature = sess.run(fc3_features, feed_dict={x_holder:mb['input'].numpy(),keep_prob:dropout})  #
#            if mbi==1502:
#                feats[24032:24035,:]=feature
#            else:
#                feats[mbi*16:(mbi+1)*16,:]=feature
            feats[mbi*16:(mbi+1)*16,:]=feature
        for vbi, vmb in enumerate(val_loader):
            valfeature = sess.run(fc3_features, feed_dict={x_holder:vmb['input'].squeeze().numpy(),keep_prob:dropout})
            val_feat=np.mat(valfeature)
            vafeats[vbi,:]=np.mean(val_feat,0)
#            if vbi==667:
#                vafeats[2668:2670]=valfeature
#            else:
#                vafeats[vbi*4:(vbi+1)*4,:]=valfeature
    numpy.savetxt("traindata.txt", feats);
    numpy.savetxt("validationdata.txt", vafeats);        
    a=1
    return feature
    def __init__(self, config: Config, shape):
        width = shape[1]
        height = shape[0]

        output_units = config.get_value("n_classes", 3)
        n_channels = config.get_value("n_channels", 2)

        activation = selu
        weight_init = scaled_elu_initialization

        n_full1 = config.get_value("n_full1", 256)
        n_full2 = config.get_value("n_full1", 256)

        # tf Graph input, compatible to DenseNet
        X = tf.placeholder(tf.float32, [None, height, width, n_channels],
                           name="Features")
        y_ = tf.placeholder(tf.float32, [None, output_units], name="Labels")
        d = tf.placeholder(tf.float32)

        layers = list()
        layers.append(conv(X, weight_init, activation, k=3, s=2, out=32, id=1))
        layers.append(maxpool(layers[-1], k=2, s=2, id=1))
        blk1 = layers[-1]
        blk1_bmp = layers[-2]
        print("Block 1: {}".format(blk1.get_output_shape()))

        layers.append(
            conv(layers[-1], weight_init, activation, k=3, s=2, out=64, id=2))
        layers.append(
            conv(layers[-1], weight_init, activation, k=3, s=1, out=64, id=3))
        layers.append(
            conv(layers[-1], weight_init, activation, k=3, s=1, out=64, id=4))
        layers.append(
            conv(layers[-1], weight_init, activation, k=3, s=1, out=64, id=5))
        layers.append(maxpool(layers[-1], k=2, s=2, id=2))
        blk2 = layers[-1]
        blk2_bmp = layers[-2]
        print("Block 2: {}".format(blk2.get_output_shape()))

        layers.append(
            conv(layers[-1], weight_init, activation, k=3, s=1, out=128, id=6))
        layers.append(
            conv(layers[-1], weight_init, activation, k=3, s=1, out=128, id=7))
        layers.append(
            conv(layers[-1], weight_init, activation, k=3, s=1, out=128, id=8))
        blk3 = layers[-1]
        print("Block 3: {}".format(blk3.get_output_shape()))

        # global average pooling
        layers.append(global_average(blk1, id=1))
        layers.append(global_average(blk2, id=2))
        layers.append(global_average(blk3, id=3))

        # concat
        layers.append(ConcatLayer(layers[-3:], name="ConcatAverage"))

        print("Concat: {}".format(layers[-1].get_output_shape()))

        # FC
        layers.append(
            fc(layers[-1],
               weight_init,
               activation,
               n_full1,
               flatten=True,
               id=1))
        layers.append(dropout(layers[-1], d, activation, 2))
        print("FC 1: {}".format(layers[-1].get_output_shape()))

        layers.append(
            fc(layers[-1],
               weight_init,
               activation,
               n_full2,
               flatten=False,
               id=2))
        layers.append(dropout(layers[-1], d, activation, 3))
        print("FC 2: {}".format(layers[-1].get_output_shape()))

        layers.append(
            fc(layers[-1],
               weight_init,
               tf.identity,
               output_units,
               flatten=False,
               id=3))

        # publish
        self.X = X
        self.y_ = y_
        self.dropout = d
        self.blk1_bmp = blk1_bmp
        self.blk2_bmp = blk2_bmp
        self.blk3 = blk3
        self.fc1 = layers[-5]
        self.fc2 = layers[-3]
        self.out = layers[-1]
        self.layers = layers
        self.output = layers[-1].get_output()
    def __init__(self, config: Config, shape):
        width = shape[1]
        height = shape[0]

        n_classes = config.get_value("n_classes", 13)
        a = config.get_value('a', 5)

        act = tf.nn.relu
        w_init = weight_xavier_conv2d

        # tf Graph input
        X = tf.placeholder(tf.float32, [None, height, width, 2],
                           name="Features")
        y_ = tf.placeholder(tf.float32, [None, n_classes], name="Labels")
        # dropout
        d = tf.placeholder(tf.float32)
        d1 = tf.cond(tf.equal(d, tf.constant(0, dtype=tf.float32)),
                     lambda: tf.constant(0, dtype=tf.float32),
                     lambda: tf.constant(0.2, dtype=tf.float32))
        d2 = tf.cond(tf.equal(d, tf.constant(0, dtype=tf.float32)),
                     lambda: tf.constant(0, dtype=tf.float32),
                     lambda: tf.constant(0.5, dtype=tf.float32))

        print(X.get_shape())

        layers = list()
        layers.append(avgpool(X, k=3, s=2, id=1))
        layers.append(conv(layers[-1], w_init, act, k=3, s=1, out=32, id=1))
        layers.append(conv(layers[-1], w_init, act, k=3, s=1, out=64, id=2))
        layers.append(maxpool(layers[-1], k=3, s=2, id=1))
        layers.append(dropout(layers[-1], d1, act, id=1))

        layers.append(conv(layers[-1], w_init, act, k=3, s=1, out=64, id=3))
        layers.append(maxpool(layers[-1], k=3, s=2, id=2))
        layers.append(dropout(layers[-1], d1, act, id=2))

        layers.append(conv(layers[-1], w_init, act, k=3, s=1, out=128, id=4))
        layers.append(maxpool(layers[-1], k=3, s=2, id=3))
        layers.append(dropout(layers[-1], d1, act, id=3))

        layers.append(conv(layers[-1], w_init, act, k=3, s=1, out=128, id=5))
        layers.append(maxpool(layers[-1], k=3, s=2, id=4))
        layers.append(dropout(layers[-1], d1, act, id=4))

        layers.append(conv(layers[-1], w_init, act, k=1, s=1, out=1000, id=6))
        layers.append(dropout(layers[-1], d2, act, id=5))

        # intermediate output layer
        layers.append(
            conv(layers[-1],
                 w_init,
                 tf.identity,
                 k=1,
                 s=1,
                 out=n_classes,
                 id=7))
        # noisyAnd pooling
        with tf.variable_scope('NoisyAND'):
            a = tf.get_variable(name='a',
                                shape=[1],
                                initializer=tf.constant_initializer(a),
                                trainable=False)
            b = tf.get_variable(name='b',
                                shape=[1, n_classes],
                                initializer=tf.constant_initializer(0.0))
            b = tf.clip_by_value(b, 0.0, 1.0)
            mean = tf.reduce_mean(tf.nn.sigmoid(layers[-1].get_output()),
                                  axis=[1, 2])
            noisyAnd = (tf.nn.sigmoid(a * (mean - b)) - tf.nn.sigmoid(-a * b)) / \
                       (tf.sigmoid(a * (1 - b)) - tf.sigmoid(-a * b))

        # output layer
        layers.append(
            fc(layers[-1],
               tf.contrib.layers.xavier_initializer(uniform=False,
                                                    seed=None,
                                                    dtype=tf.float32),
               tf.identity,
               units=n_classes,
               flatten=False,
               id=1))

        # publish
        self.X = X
        self.y_ = y_
        self.dropout = d
        self.output_nand = noisyAnd
        self.output = layers[-1].get_output(prev_layers=[layers[-2]])
        self.layers = layers
def main(_):
    np.random.seed(0)
    rng = np.random.RandomState(seed=0)

    config = Config()

    #
    # Load Data
    #
    with Timer(name="Load data"):
        training_data = BouncingMNISTDataHandler(
            config, config.mnist_train_images, config.mnist_train_labels, rng)
        test_data = BouncingMNISTDataHandler(
            config, config.mnist_test_images, config.mnist_test_labels, rng)

    dataset = DataSet((config.batch_size, config.num_frames, config.image_size, config.image_size, 1),
                      (config.batch_size, config.num_frames, config.image_size, config.image_size))

    # Create new TeLL session with two summary writers
    tell = TeLLSession(config=config, summaries=["train", "validation"], model_params={"dataset": dataset})
    
    # Get some members from the session for easier usage
    session = tell.tf_session
    summary_writer_train, summary_writer_validation = tell.tf_summaries["train"], tell.tf_summaries["validation"]
    model = tell.model
    workspace, config = tell.workspace, tell.config
    
    # Parameters
    learning_rate = config.get_value("learning_rate", 1e-3)
    iterations = config.get_value("iterations", 1000)
    batch_size = config.get_value("batch_size", 256)
    display_step = config.get_value("display_step", 10)
    calc_statistics = config.get_value("calc_statistics", False)
    blur_filter_size = config.get_value("blur_filter_size", None)

    training_summary_tensors = OrderedDict()

    # Define loss and optimizer
    #with tf.name_scope("Cost"):
    #    sem_seg_loss, _ = image_crossentropy(pred=model.output, target=model.y_,
    #                                         calc_statistics=calc_statistics, reduce_by="sum")
    #    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(sem_seg_loss)
    #    tf.summary.scalar("Loss", sem_seg_loss)

    # Evaluate model
    validation_summary_tensors = OrderedDict()

    # validationset always uses class weights for loss calculation
    with tf.name_scope('Cost'):
        blur_sampling_range = tf.placeholder(tf.float32)

        if blur_filter_size is not None:
            sem_seg_loss = blurred_cross_entropy(output=model.output, target=model.y_,
                                                 filter_size=blur_filter_size,
                                                 sampling_range=blur_sampling_range)
        else:
            sem_seg_loss, _ = image_crossentropy(pred=model.output, target=model.y_,
                                                 reduce_by="mean", calc_statistics=calc_statistics)
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(sem_seg_loss)
        iou, iou_op = tf.contrib.metrics.streaming_mean_iou(
            predictions=tf.squeeze(tf.arg_max(model.output, 4)),
            labels=tf.squeeze(model.y_),
            num_classes=model.output.get_shape()[-1])
        loss_prot = tf.summary.scalar("Loss", sem_seg_loss)
        iou_prot = tf.summary.scalar("IoU", iou)

    train_summaries = tf.summary.merge([loss_prot])
    valid_summaries = tf.summary.merge([loss_prot, iou_prot])
    
    # Initialize tensorflow variables (either initializes them from scratch or restores from checkpoint)
    step = tell.initialize_tf_variables().global_step
    
    # -------------------------------------------------------------------------
    # Start training
    # -------------------------------------------------------------------------

    plot_elements_sym = list(model.get_plot_dict().values())
    plot_elements = list()
    plot_ranges = model.get_plot_range_dict()

    try:
        while step < iterations:
            check_kill_file(workspace=workspace)
            batch_x, batch_y = training_data.GetBatch()
            
            i = step * batch_size
            if step % display_step == 0:
                mean_loss = 0.
                for j in range(10):
                    test_x, test_y = test_data.GetBatch()

                    summary, loss, _, *plot_elements = session.run([valid_summaries, sem_seg_loss, iou_op, *plot_elements_sym],
                                               feed_dict={model.X: test_x,
                                                          model.y_feed: test_y,
                                                          blur_sampling_range: 3.5})

                    summary_writer_validation.add_summary(summary, i)
                    mean_loss += loss

                    # Re-associate returned tensorflow values to plotting keys
                    plot_dict = OrderedDict(zip(list(model.get_plot_dict().keys()), plot_elements))

                    # Plot outputs and cell states over frames if specified
                    if config.store_states and 'ConvLSTMLayer_h' in plot_dict and step % config.plot_at == 0:
                        convh = plot_dict['ConvLSTMLayer_h']
                        convrh = [c[0, :, :, 0] for c in convh]
                        convrh = [convrh[:6], convrh[6:12], convrh[12:18], convrh[18:24], convrh[24:]]
                        plot_args = dict(images=convrh,
                                         filename=os.path.join(workspace.get_result_dir(),
                                                               "step{}_h.png".format(step)))
                        plotter.set_plot_kwargs(plot_args)
                        plotter.plot()

                    if config.store_states and 'ConvLSTMLayer_c' in plot_dict and step % config.plot_at == 0:
                        convc = plot_dict['ConvLSTMLayer_c']
                        convrc = [c[0, :, :, 0] for c in convc]
                        convrc = [convrc[:6], convrc[6:12], convrc[12:18], convrc[18:24], convrc[24:]]
                        plot_args = dict(images=convrc,
                                         filename=os.path.join(workspace.get_result_dir(),
                                                               "step{}_c.png".format(step)))
                        plotter.set_plot_kwargs(plot_args)
                        plotter.plot()
                print('Validation Loss at step {}: {}'.format(i, mean_loss / 10))

            summary, loss, _ = session.run([train_summaries, sem_seg_loss, optimizer],
                                          feed_dict={model.X: batch_x,
                                                     model.y_feed: batch_y,
                                                     blur_sampling_range: 3.5})
            summary_writer_train.add_summary(summary, i)
            
            step += 1
        
        print("Training Finished!")

        # Final Eval
        mean_loss = 0.

        for j in range(100):
            test_x, test_y = test_data.GetBatch()
            summary, loss, _ = session.run([valid_summaries, sem_seg_loss, iou_op],
                                        feed_dict={model.X: test_x,
                                                   model.y_feed: test_y,
                                                   blur_sampling_range: 3.5})
            mean_loss += loss

        test_x, test_y = test_data.GetBatch()
        pred = session.run(tf.argmax(model.output, 4), feed_dict={model.X: test_x})

        pred = to_color(pred)
        true = to_color(test_y)
        out = to_image(pred, true)

        for i in range(pred.shape[0]):
            imsave(tell.workspace.get_result_dir() + '/sample_{:02d}.png'.format(i), out[i,])

        print("Validation Loss {}".format(mean_loss / 100))
    except AbortRun:
        print("Aborting...")
    finally:
        tell.close(global_step=step)
        plotter.close()
def main(_):
    # ------------------------------------------------------------------------------------------------------------------
    # Setup training
    # ------------------------------------------------------------------------------------------------------------------
    
    # Initialize config, parses command line and reads specified file; also supports overriding of values from cmd
    config = Config()
    
    #
    # Prepare input data
    #
    
    # Make sure datareader is reproducible
    random_seed = config.get_value('random_seed', 12345)
    np.random.seed(random_seed)  # not threadsafe, use rnd_gen object where possible
    rnd_gen = np.random.RandomState(seed=random_seed)
    
    # Set datareaders
    n_timesteps = config.get_value('mnist_n_timesteps', 20)
    
    # Load datasets for trainingset
    with Timer(name="Loading Data"):
        readers = initialize_datareaders(config, required=("train", "val"))
    
    # Set Preprocessing
    trainingset = Normalize(readers["train"], apply_to=['X', 'y'])
    validationset = Normalize(readers["val"], apply_to=['X', 'y'])
    
    # Set minibatch loaders
    trainingset = DataLoader(trainingset, batchsize=2, batchsize_method='zeropad', verbose=False)
    validationset = DataLoader(validationset, batchsize=2, batchsize_method='zeropad', verbose=False)
    
    #
    # Initialize TeLL session
    #
    tell = TeLLSession(config=config, summaries=["train", "validation"], model_params={"dataset": trainingset})
    
    # Get some members from the session for easier usage
    sess = tell.tf_session
    summary_writer_train, summary_writer_validation = tell.tf_summaries["train"], tell.tf_summaries["validation"]
    model = tell.model
    workspace, config = tell.workspace, tell.config
    
    #
    # Define loss functions and update steps
    #
    print("Initializing loss calculation...")
    loss, _ = image_crossentropy(target=model.y_[:, 10:, :, :], pred=model.output[:, 10:, :, :, :],
                                 pixel_weights=model.pixel_weights[:, 10:, :, :], reduce_by='mean')
    train_summary = tf.summary.scalar("Training Loss", loss)  # create summary to add to tensorboard
    
    # Loss function for validationset
    val_loss = loss
    val_loss_summary = tf.summary.scalar("Validation Loss", val_loss)  # create summary to add to tensorboard
    
    # Regularization
    reg_penalty = regularize(layers=model.get_layers(), l1=config.l1, l2=config.l2,
                             regularize_weights=True, regularize_biases=True)
    regpen_summary = tf.summary.scalar("Regularization Penalty", reg_penalty)  # create summary to add to tensorboard
    
    # Update step for weights
    update = update_step(loss + reg_penalty, config)
    
    #
    # Initialize tensorflow variables (either initializes them from scratch or restores from checkpoint)
    #
    global_step = tell.initialize_tf_variables().global_step
    
    #
    # Set up plotting
    #  (store tensors we want to plot in a dictionary for easier tensor-evaluation)
    #
    # We want to plot input, output and target for the 1st sample, 1st frame, and 1st channel in subplot 1
    tensors_subplot1 = OrderedDict()
    tensors_subplot2 = OrderedDict()
    tensors_subplot3 = OrderedDict()
    for frame in range(n_timesteps):
        tensors_subplot1['input_{}'.format(frame)] = model.X[0, frame, :, :]
        tensors_subplot2['target_{}'.format(frame)] = model.y_[0, frame, :, :] - 1
        tensors_subplot3['network_output_{}'.format(frame)] = tf.argmax(model.output[0, frame, :, :, :], axis=-1) - 1
    # We also want to plot the cell states and hidden states for each frame (again of the 1st sample and 1st lstm unit)
    # in subplot 2 and 3
    tensors_subplot4 = OrderedDict()
    tensors_subplot5 = OrderedDict()
    for frame in range(len(model.lstm_layer.c)):
        tensors_subplot4['hiddenstate_{}'.format(frame)] = model.lstm_layer.h[frame][0, :, :, 0]
        tensors_subplot5['cellstate_{}'.format(frame)] = model.lstm_layer.c[frame][0, :, :, 0]
    # Create a list to store all symbolic tensors for plotting
    plotting_tensors = list(tensors_subplot1.values()) + list(tensors_subplot2.values()) + \
                       list(tensors_subplot3.values()) + list(tensors_subplot4.values()) + \
                       list(tensors_subplot5.values())
    
    #
    # Finalize graph
    #  This makes our tensorflow graph read-only and prevents further additions to the graph
    #
    sess.graph.finalize()
    if sess.graph.finalized:
        print("Graph is finalized!")
    else:
        raise ValueError("Could not finalize graph!")
    
    sys.stdout.flush()
    
    # ------------------------------------------------------------------------------------------------------------------
    # Start training
    # ------------------------------------------------------------------------------------------------------------------
    
    try:
        epoch = int(global_step / trainingset.n_mbs)
        epochs = range(epoch, config.n_epochs)
        
        # Loop through epochs
        print("Starting training")
        
        for ep in epochs:
            epoch = ep
            print("Starting training epoch: {}".format(ep))
            # Initialize variables for over-all loss per epoch
            train_loss = 0
            
            # Load one minibatch at a time and perform a training step
            t_mb = Timer(verbose=True, name="Load Minibatch")
            mb_training = trainingset.batch_loader(rnd_gen=rnd_gen)
            
            #
            # Loop through minibatches
            #
            for mb_i, mb in enumerate(mb_training):
                sys.stdout.flush()
                # Print minibatch load time
                t_mb.print()
                
                # Abort if indicated by file
                check_kill_file(workspace)
                
                #
                # Calculate scores on validation set
                #
                if global_step % config.score_at == 0:
                    print("Starting scoring on validation set...")
                    evaluate_on_validation_set(validationset, global_step, sess, model, summary_writer_validation,
                                               val_loss_summary, val_loss, workspace)
                
                #
                # Perform weight updates and do plotting
                #
                if (mb_i % config.plot_at) == 0 and os.path.isfile(workspace.get_plot_file()):
                    # Perform weight update, return summary values and values for plotting
                    with Timer(verbose=True, name="Weight Update"):
                        plotting_values = []
                        train_summ, regpen_summ, _, cur_loss, *plotting_values = sess.run(
                            [train_summary, regpen_summary, update, loss, *plotting_tensors],
                            feed_dict={model.X: mb['X'], model.y_: mb['y']})
                    
                    # Add current summary values to tensorboard
                    summary_writer_train.add_summary(train_summ, global_step=global_step)
                    summary_writer_train.add_summary(regpen_summ, global_step=global_step)
                    
                    # Create and save subplot 1 (input)
                    save_subplots(images=plotting_values[:len(tensors_subplot1)],
                                  subfigtitles=list(tensors_subplot1.keys()),
                                  subplotranges=[(0, 1)] * n_timesteps, colorbar=True, automatic_positioning=True,
                                  tight_layout=True,
                                  filename=os.path.join(workspace.get_result_dir(),
                                                        "input_ep{}_mb{}.png".format(ep, mb_i)))
                    del plotting_values[:len(tensors_subplot1)]
                    
                    # Create and save subplot 2 (target)
                    save_subplots(images=plotting_values[:len(tensors_subplot2)],
                                  subfigtitles=list(tensors_subplot2.keys()),
                                  subplotranges=[(0, 10) * n_timesteps], colorbar=True, automatic_positioning=True,
                                  tight_layout=True,
                                  filename=os.path.join(workspace.get_result_dir(),
                                                        "target_ep{}_mb{}.png".format(ep, mb_i)))
                    del plotting_values[:len(tensors_subplot2)]
                    
                    # Create and save subplot 3 (output)
                    save_subplots(images=plotting_values[:len(tensors_subplot3)],
                                  subfigtitles=list(tensors_subplot3.keys()),
                                  # subplotranges=[(0, 10)] * n_timesteps,
                                  colorbar=True, automatic_positioning=True,
                                  tight_layout=True,
                                  filename=os.path.join(workspace.get_result_dir(),
                                                        "output_ep{}_mb{}.png".format(ep, mb_i)))
                    del plotting_values[:len(tensors_subplot3)]
                    
                    # Create and save subplot 2 (hidden states, i.e. ConvLSTM outputs)
                    save_subplots(images=plotting_values[:len(tensors_subplot4)],
                                  subfigtitles=list(tensors_subplot4.keys()),
                                  title='ConvLSTM hidden states (outputs)', colorbar=True, automatic_positioning=True,
                                  tight_layout=True,
                                  filename=os.path.join(workspace.get_result_dir(),
                                                        "hidden_ep{}_mb{}.png".format(ep, mb_i)))
                    del plotting_values[:len(tensors_subplot4)]
                    
                    # Create and save subplot 3 (cell states)
                    save_subplots(images=plotting_values[:len(tensors_subplot5)],
                                  subfigtitles=list(tensors_subplot5.keys()),
                                  title='ConvLSTM cell states', colorbar=True, automatic_positioning=True,
                                  tight_layout=True,
                                  filename=os.path.join(workspace.get_result_dir(),
                                                        "cell_ep{}_mb{}.png".format(ep, mb_i)))
                    del plotting_values[:len(tensors_subplot5)]
                
                else:
                    #
                    # Perform weight update without plotting
                    #
                    with Timer(verbose=True, name="Weight Update"):
                        train_summ, regpen_summ, _, cur_loss = sess.run([
                            train_summary, regpen_summary, update, loss],
                            feed_dict={model.X: mb['X'], model.y_: mb['y']})
                    
                    # Add current summary values to tensorboard
                    summary_writer_train.add_summary(train_summ, global_step=global_step)
                    summary_writer_train.add_summary(regpen_summ, global_step=global_step)
                
                # Add current loss to running average loss
                train_loss += cur_loss
                
                # Print some status info
                print("ep {} mb {} loss {} (avg. loss {})".format(ep, mb_i, cur_loss, train_loss / (mb_i + 1)))
                
                # Reset timer
                t_mb = Timer(name="Load Minibatch")
                
                # Free the memory allocated for the minibatch data
                mb.clear()
                del mb
                
                global_step += 1
            
            #
            # Calculate scores on validation set
            #
            
            # Perform scoring on validation set
            print("Starting scoring on validation set...")
            evaluate_on_validation_set(validationset, global_step, sess, model, summary_writer_validation,
                                       val_loss_summary, val_loss, workspace)
            
            # Save the model
            tell.save_checkpoint(global_step=global_step)
            
            # Abort if indicated by file
            check_kill_file(workspace)
    
    except AbortRun:
        print("Detected kill file, aborting...")
    
    finally:
        #
        # If the program executed correctly or an error was raised, close the data readers and save the model and exit
        #
        trainingset.close()
        validationset.close()
        tell.close(save_checkpoint=True, global_step=global_step)
Beispiel #8
0
def main(_):
    config = Config()
    # Create new TeLL session with two summary writers
    tell = TeLLSession(config=config, summaries=["train", "validation"])
    
    # Get some members from the session for easier usage
    session = tell.tf_session
    summary_writer_train, summary_writer_validation = tell.tf_summaries["train"], tell.tf_summaries["validation"]
    model = tell.model
    workspace, config = tell.workspace, tell.config
    
    # Parameters
    learning_rate = config.get_value("learning_rate", 1e-3)
    iterations = config.get_value("iterations", 1000)
    batchsize = config.get_value("batchsize", 250)
    display_step = config.get_value("display_step", 10)
    dropout = config.get_value("dropout_prob", 0.25)
    
    #
    # Load Data
    #
    with Timer(name="Load data"):
        mnist = input_data.read_data_sets("../MNIST_data", one_hot=True)
    
    # Define loss and optimizer
    with tf.name_scope("Cost"):
        cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=model.output, labels=model.y_))

        ##entropy = tf.reduce_mean(tf.contrib.bayesflow.entropy.entropy_shannon(
        ##    tf.contrib.distributions.Categorical(p=tf.nn.softmax(logits=model.output))))

        probs = tf.nn.softmax(logits=model.output)
        entropy = tf.reduce_mean(-tf.reduce_sum(tf.log(tf.maximum(probs, 1e-15)) * probs, 1))

        # test decor regularization
        #decor_penalty(model.hidden1, model.y_, 10, [1], 0.)
        #decor_penalty(model.hidden2, model.y_, 10, [1], 0.)

        optimizer = tell.tf_optimizer.minimize(cost - config.get_value("entropy_w", 0.) * entropy)

        tf.summary.scalar("Loss", cost)
        #tf.summary.scalar("Decor", decor1 + decor2)
        #tf.summary.scalar("Entropy", entropy)
        tf.summary.scalar("O-Prob", tf.reduce_mean(tf.reduce_sum(tf.nn.softmax(logits=model.output) * model.y_, 1)))
    
    # Evaluate model
    with tf.name_scope("Accuracy"):
        correct_pred = tf.equal(tf.argmax(model.output, 1), tf.argmax(model.y_, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
        tf.summary.scalar("Accuracy", accuracy)
    
    merged_summaries = tf.summary.merge_all()
    
    # Initialize tensorflow variables (either initializes them from scratch or restores from checkpoint)
    step = tell.initialize_tf_variables(reset_optimizer_on_restore=True).global_step
    
    # -------------------------------------------------------------------------
    # Start training
    # -------------------------------------------------------------------------
    acc_train = 0.
    val_acc_best = 0.
    try:
        while step < iterations:
            check_kill_file(workspace=workspace)
            batch_x, batch_y = mnist.train.next_batch(batchsize)
            
            i = step * batchsize
            if step % display_step == 0:
                summary, acc = session.run([merged_summaries, accuracy],
                                           feed_dict={model.X: mnist.validation.images[:2048],
                                                      model.y_: mnist.validation.labels[:2048],
                                                      model.dropout: 0})
                summary_writer_validation.add_summary(summary, i)
                print('step {}: train acc {}, valid acc {}'.format(i, acc_train, acc))

                if acc > val_acc_best:
                    val_acc_best = acc
            else:
                summary, acc_train, _ = session.run([merged_summaries, accuracy, optimizer],
                                              feed_dict={model.X: batch_x, model.y_: batch_y,
                                                         model.dropout: dropout})
                summary_writer_train.add_summary(summary, i)
            
            step += 1
        
        print("Training Finished! best valid acc {}".format(val_acc_best))
        
        # Final Eval
        print("Test Accuracy:",
              session.run(accuracy, feed_dict={model.X: mnist.test.images[:2048],
                                               model.y_: mnist.test.labels[:2048],
                                               model.dropout: 0}))
    except AbortRun:
        print("Aborting...")
    finally:
        tell.close(global_step=step)
Beispiel #9
0
                      working_dir=working_dir,
                      config=config,
                      plotting=dict(
                          save_subplots=save_subplots,
                          save_movie=save_movie,
                          save_subplots_line_plots=save_subplots_line_plots),
                      rnd_gen=rnd_gen)


if __name__ == '__main__':
    config = Config()
    working_dir = os.path.join(config.working_dir, config.specs)
    working_dir = os.path.join(working_dir,
                               dt.datetime.now().strftime("%Y-%m-%dT%H-%M-%S"))
    make_sure_path_exists(working_dir)

    with open(os.path.join(working_dir, 'log.txt'), 'a') as logfile:
        sys.stdout = Tee(sys.stdout, logfile, sys.stdout)

        bl_config = config.get_value('bl_config')

        logger.configure(os.path.join(working_dir, 'baselines'),
                         ['tensorboard', 'log', 'stdout'])
        train(env_id=bl_config['env'],
              num_timesteps=bl_config['num_timesteps'],
              policy=config.get_value('policy'),
              working_dir=working_dir,
              config=config)

        sys.stdout.flush()
Beispiel #10
0
def main(_):
    # ------------------------------------------------------------------------------------------------------------------
    # Setup training
    # ------------------------------------------------------------------------------------------------------------------

    # Initialize config, parses command line and reads specified file; also supports overriding of values from cmd
    config = Config()

    random_seed = config.get_value('random_seed', 12345)
    np.random.seed(random_seed)  # not threadsafe, use rnd_gen object where possible
    rnd_gen = np.random.RandomState(seed=random_seed)

    # Load datasets for trainingset
    with Timer(name="Loading Data"):
        readers = initialize_datareaders(config, required=("train", "val"))
        trainingset = DataLoader(readers["train"], batchsize=config.batchsize)
        #validationset = DataLoader(readers["val"], batchsize=config.batchsize)

        # Initialize TeLL session
    tell = TeLLSession(config=config, model_params={"input_shape": [300]})

    # Get some members from the session for easier usage
    session = tell.tf_session

    model = tell.model
    workspace, config = tell.workspace, tell.config



    # Initialize Tensorflow variables
    global_step = tell.initialize_tf_variables().global_step

    sys.stdout.flush()

    # ------------------------------------------------------------------------------------------------------------------
    # Start training
    # ------------------------------------------------------------------------------------------------------------------

    try:
        epoch = int(global_step / trainingset.n_mbs)
        epochs = range(epoch, config.n_epochs)

        #
        # Loop through epochs
        #
        print("Starting training")

        for ep in epochs:
            print("Starting training epoch: {}".format(ep))
            # Initialize variables for over-all loss per epoch
            train_loss = 0

            # Load one minibatch at a time and perform a training step
            t_mb = Timer(name="Load Minibatch")
            mb_training = trainingset.batch_loader(rnd_gen=rnd_gen)

            #
            # Loop through minibatches
            #


            for mb_i, mb in enumerate(mb_training):
                sys.stdout.flush()
                #Print minibatch load time
                t_mb.print()

                # Abort if indicated by file
                check_kill_file(workspace)

                #
                # Calculate scores on validation set
                #
                if global_step % config.score_at == 0:
                    print("Starting scoring on validation set...")


                # Get new sample
                training_sample = np.ones(shape=(1,np.random.randint(low=20,high=100),300))
                #
                # Perform weight update
                #
                with Timer(name="Weight Update"):

                    #
                    # Set placeholder values
                    #
                    placeholder_values = OrderedDict(
                        input_placeholder=training_sample,
                        sequence_length_placeholder = training_sample.shape[1]
                    )
                    feed_dict = dict(((model.placeholders[k], placeholder_values[k]) for k in placeholder_values.keys()))

                    #
                    # Decide which tensors to compute
                    #
                    data_keys = ['lstm_internals_enc', 'lstm_internals_dec', 'lstm_h_enc',
                                 'lstm_h_dec', 'loss' , 'loss_last_time_prediction','loss_last_time_prediction', 'reg_loss']
                    data_tensors = [model.data_tensors[k] for k in data_keys]

                    operation_keys = ['ae_update']
                    operation_tensors = [model.operation_tensors[k] for k in operation_keys]

                    summary_keys = ['all_summaries']
                    summary_tensors = [model.summaries[k] for k in summary_keys]

                    #
                    # Run graph and re-associate return values with keys in dictionary
                    #
                    ret = session.run(data_tensors + summary_tensors + operation_tensors, feed_dict)

                    data_keys = ['loss']
                    data_tensors = [model.data_tensors[k] for k in data_keys]
                    session.run(model.data_tensors['loss'] , feed_dict)
                    session.run(model.data_tensors['latent_space'], feed_dict)

                    ret_dict = OrderedDict(((k, ret[i]) for i, k in enumerate(data_keys)))
                    del ret[:len(data_keys)]
                    ret_dict.update(OrderedDict(((k, ret[i]) for i, k in enumerate(summary_keys))))






                # Print some status info
                #print("ep {} mb {} loss {} (avg. loss {})".format(ep, mb_i, cur_loss, train_loss / (mb_i + 1)))

                # Reset timer
                #t_mb = Timer(name="Load Minibatch")

                # Free the memory allocated for the minibatch data
                #mb.clear()
                #del mb

                global_step += 1

            #
            # Calculate scores on validation set after training is done
            #

            # Perform scoring on validation set
            print("Starting scoring on validation set...")


            tell.save_checkpoint(global_step=global_step)

            # Abort if indicated by file
            check_kill_file(workspace)

    except AbortRun:
        print("Detected kill file, aborting...")

    finally:
        tell.close(save_checkpoint=True, global_step=global_step)
Beispiel #11
0
    def __init__(self,
                 config: Config = None,
                 summaries: list = ["training"],
                 model_params=None):
        """
        Take care of initializing a TeLL environment.
            Creates working directory, instantiates network architecture, configures tensorflow and tensorboard.
            Furthermore takes care of resuming runs from an existing workspace.

        :param config: Config
            config object or None; if None config will be initialized from command line parameter
        :param summaries: list
            List of names for summary writers, by default one writer named "training" is opened
        :param model_params:
            Optional dictionary of parameters unpacked and passed to model upon initialization if not None

        :returns:

        tf_session: Tensorflow session

        tf_saver: Tensorflow checkpoint saver

        tf_summaries: dictionary containing tensorflow summary writers, accessible via the names passed upon creation

        model: TeLL model

        step: current global step (0 for new runs otherwise step stored in checkpoint file)

        workspace: TeLL workspace instance

        config: TeLL config object
        """
        if config is None:
            config = Config()

        # Setup working dir
        workspace = Workspace(config.working_dir, config.specs, config.restore)
        print("TeLL workspace: {}".format(workspace.working_dir))
        # Import configured architecture
        architecture = config.import_architecture()
        # Set GPU
        os.environ["CUDA_VISIBLE_DEVICES"] = str(
            config.get_value("cuda_gpu", "0"))
        # Some Tensorflow configuration
        tf_config = tf.ConfigProto(
            inter_op_parallelism_threads=config.get_value(
                "inter_op_parallelism_threads", 1),
            intra_op_parallelism_threads=config.get_value(
                "intra_op_parallelism_threads", 1),
            log_device_placement=config.get_value("log_device_placement",
                                                  False))
        tf_config.gpu_options.allow_growth = config.get_value(
            "tf_allow_growth", True)
        # Start Tensorflow session
        print("Starting session...")
        tf_session = tf.Session(config=tf_config)
        # Set tensorflow random seed
        set_seed(config.get_value("random_seed", 12345))
        #
        # Init Tensorboard
        #
        print("Initializing summaries...")
        summary_instances = {}
        for summary in summaries:
            summary_instances[summary] = tf.summary.FileWriter(
                os.path.join(workspace.get_tensorboard_dir(), summary),
                tf_session.graph)
        # Initialize Model
        if model_params is None:
            model = architecture(config=config)
        else:
            model = architecture(config=config, **model_params)

        # Print number of trainable parameters
        trainable_vars = np.sum(
            [np.prod(t.get_shape()) for t in tf.trainable_variables()])
        print("Number of trainable variables: {}".format(trainable_vars))

        with tf.name_scope("TeLL") as tell_namescope:
            # Store global step in checkpoint
            tf_global_step = tf.Variable(initial_value=tf.constant(
                0, dtype=tf.int64),
                                         name="tell_global_step",
                                         dtype=tf.int64,
                                         trainable=False)
            # Define placeholder and operation to dynamically update tf_global_step with a python integer
            global_step_placeholder = tf.placeholder_with_default(
                tf_global_step, shape=tf_global_step.get_shape())
            set_global_step = tf_global_step.assign(global_step_placeholder)

        #
        # Add ops to save and restore all the variables
        #
        tf_saver = tf.train.Saver(max_to_keep=config.get_value(
            "max_checkpoints", 10),
                                  sharded=False)
        # Expose members
        self.tf_session = tf_session
        self.tf_saver = tf_saver
        self.tf_summaries = summary_instances
        self.model = model
        self.workspace = workspace
        self.config = config
        self.global_step = 0
        self.__global_step_placeholder = global_step_placeholder
        self.__global_step_update = set_global_step
        self.__tell_namescope = tell_namescope
Beispiel #12
0
def main(_):
    # ------------------------------------------------------------------------------------------------------------------
    # Setup training
    # ------------------------------------------------------------------------------------------------------------------

    # Initialize config, parses command line and reads specified file; also supports overriding of values from cmd
    config = Config()

    # Load datasets for trainingset
    with Timer(name="Loading Training Data"):
        # Make sure datareader is reproducible
        random_seed = config.get_value('random_seed', 12345)
        np.random.seed(
            random_seed)  # not threadsafe, use rnd_gen object where possible
        rnd_gen = np.random.RandomState(seed=random_seed)

        print("Loading training data...")
        trainingset = ShortLongDataset(n_timesteps=250,
                                       n_samples=3000,
                                       batchsize=config.batchsize,
                                       rnd_gen=rnd_gen)

        # Load datasets for validationset
        validationset = ShortLongDataset(n_timesteps=250,
                                         n_samples=300,
                                         batchsize=config.batchsize,
                                         rnd_gen=rnd_gen)

    # Initialize TeLL session
    tell = TeLLSession(config=config,
                       summaries=["train"],
                       model_params={"dataset": trainingset})

    # Get some members from the session for easier usage
    session = tell.tf_session
    summary_writer = tell.tf_summaries["train"]
    model = tell.model
    workspace, config = tell.workspace, tell.config

    # Loss function for trainingset
    print("Initializing loss calculation...")
    loss = tf.reduce_mean(
        tf.reduce_mean(tf.nn.weighted_cross_entropy_with_logits(
            model.y_, model.output,
            -tf.reduce_sum(model.y_ - 1) / tf.reduce_sum(model.y_)),
                       axis=[1]))
    train_summary = tf.summary.scalar("Training Loss",
                                      loss)  # add loss to tensorboard

    # Loss function for validationset
    val_loss = tf.reduce_mean(
        tf.reduce_mean(tf.nn.weighted_cross_entropy_with_logits(
            model.y_, model.output,
            -tf.reduce_sum(model.y_ - 1) / tf.reduce_sum(model.y_)),
                       axis=[1]))
    val_loss_summary = tf.summary.scalar(
        "Validation Loss", val_loss)  # add val_loss to tensorboard

    # Regularization
    reg_penalty = regularize(layers=model.get_layers(),
                             l1=config.l1,
                             l2=config.l2,
                             regularize_weights=True,
                             regularize_biases=True)
    regpen_summary = tf.summary.scalar(
        "Regularization Penalty",
        reg_penalty)  # add reg_penalty to tensorboard

    # Update step for weights
    update = update_step(loss + reg_penalty, config)

    # Initialize Tensorflow variables
    global_step = tell.initialize_tf_variables().global_step

    sys.stdout.flush()

    # ------------------------------------------------------------------------------------------------------------------
    # Start training
    # ------------------------------------------------------------------------------------------------------------------

    try:
        epoch = int(global_step / trainingset.n_mbs)
        epochs = range(epoch, config.n_epochs)

        #
        # Loop through epochs
        #
        print("Starting training")

        for ep in epochs:
            print("Starting training epoch: {}".format(ep))
            # Initialize variables for over-all loss per epoch
            train_loss = 0

            # Load one minibatch at a time and perform a training step
            t_mb = Timer(name="Load Minibatch")
            mb_training = trainingset.batch_loader(rnd_gen=rnd_gen)

            #
            # Loop through minibatches
            #
            for mb_i, mb in enumerate(mb_training):
                sys.stdout.flush()
                # Print minibatch load time
                t_mb.print()

                # Abort if indicated by file
                check_kill_file(workspace)

                #
                # Calculate scores on validation set
                #
                if global_step % config.score_at == 0:
                    print("Starting scoring on validation set...")
                    evaluate_on_validation_set(validationset, global_step,
                                               session, model, summary_writer,
                                               val_loss_summary, val_loss,
                                               workspace)

                #
                # Perform weight update
                #
                with Timer(name="Weight Update"):
                    train_summ, regpen_summ, _, cur_loss = session.run(
                        [train_summary, regpen_summary, update, loss],
                        feed_dict={
                            model.X: mb['X'],
                            model.y_: mb['y']
                        })

                # Add current summary values to tensorboard
                summary_writer.add_summary(train_summ, global_step=global_step)
                summary_writer.add_summary(regpen_summ,
                                           global_step=global_step)

                # Add current loss to running average loss
                train_loss += cur_loss

                # Print some status info
                print("ep {} mb {} loss {} (avg. loss {})".format(
                    ep, mb_i, cur_loss, train_loss / (mb_i + 1)))

                # Reset timer
                t_mb = Timer(name="Load Minibatch")

                # Free the memory allocated for the minibatch data
                mb.clear()
                del mb

                global_step += 1

            #
            # Calculate scores on validation set after training is done
            #

            # Perform scoring on validation set
            print("Starting scoring on validation set...")
            evaluate_on_validation_set(validationset, global_step, session,
                                       model, summary_writer, val_loss_summary,
                                       val_loss, workspace)

            tell.save_checkpoint(global_step=global_step)

            # Abort if indicated by file
            check_kill_file(workspace)

    except AbortRun:
        print("Detected kill file, aborting...")

    finally:
        tell.close(save_checkpoint=True, global_step=global_step)
Beispiel #13
0
    def __init__(self, config: Config, dataset):
        """
             
        """

        depth = config.get_value("enc_dec_depth", 2)
        basenr_convs = config.get_value("enc_dec_conv_maps_base", 32)
        include_org_label = config.get_value("include_org_label", False)
        init_name = config.get_value("conv_W_initializer",
                                     "weight_xavier_conv2d")
        conv_W_initializer = getattr(TeLL.initializations, init_name)
        shared = False

        #
        # Layer list
        #
        layers = list()

        #
        # Create placeholders for feeding an input frame and a label at the first timestep
        #
        n_seq_pos = dataset.X_shape[
            1]  # dataset.X_shape is [sample, seq_pos, x, y, features)
        X = tf.placeholder(tf.float32, shape=dataset.X_shape)
        y_ = tf.placeholder(tf.int32, shape=dataset.y_shape)

        if include_org_label:
            y_org = tf.placeholder(tf.int32, shape=dataset.y_org_shape)

        #
        # Input Layer
        #
        input_shape = dataset.X_shape[:1] + (1, ) + dataset.X_shape[2:]
        layers.append(RNNInputLayer(tf.zeros(input_shape, dtype=tf.float32)))

        #
        # Scaler Structure
        #
        conv_weights_shape = [
            config.kernel_conv, config.kernel_conv,
            layers[-1].get_output_shape()[-1], basenr_convs
        ]

        if shared:
            shared_input_conv_weights = conv_W_initializer(conv_weights_shape)
            layers.append(
                ConvLayer(incoming=layers[0],
                          W=shared_input_conv_weights,
                          a=tf.nn.elu,
                          dilation_rate=[11, 11]))
            layers.append(
                ConvLayer(incoming=layers[0],
                          W=shared_input_conv_weights,
                          a=tf.nn.elu,
                          dilation_rate=[9, 9]))
            layers.append(
                ConvLayer(incoming=layers[0],
                          W=shared_input_conv_weights,
                          a=tf.nn.elu,
                          dilation_rate=[7, 7]))
            layers.append(
                ConvLayer(incoming=layers[0],
                          W=shared_input_conv_weights,
                          a=tf.nn.elu,
                          dilation_rate=[5, 5]))
            layers.append(
                ConvLayer(incoming=layers[0],
                          W=shared_input_conv_weights,
                          a=tf.nn.elu,
                          dilation_rate=[3, 3]))
            layers.append(
                ConvLayer(incoming=layers[0],
                          W=shared_input_conv_weights,
                          a=tf.nn.elu,
                          dilation_rate=[1, 1]))
        else:
            layers.append(
                ConvLayer(incoming=layers[0],
                          W=conv_W_initializer(conv_weights_shape),
                          a=tf.nn.elu,
                          dilation_rate=[11, 11]))
            layers.append(
                ConvLayer(incoming=layers[0],
                          W=conv_W_initializer(conv_weights_shape),
                          a=tf.nn.elu,
                          dilation_rate=[9, 9]))
            layers.append(
                ConvLayer(incoming=layers[0],
                          W=conv_W_initializer(conv_weights_shape),
                          a=tf.nn.elu,
                          dilation_rate=[7, 7]))
            layers.append(
                ConvLayer(incoming=layers[0],
                          W=conv_W_initializer(conv_weights_shape),
                          a=tf.nn.elu,
                          dilation_rate=[5, 5]))
            layers.append(
                ConvLayer(incoming=layers[0],
                          W=conv_W_initializer(conv_weights_shape),
                          a=tf.nn.elu,
                          dilation_rate=[3, 3]))
            layers.append(
                ConvLayer(incoming=layers[0],
                          W=conv_W_initializer(conv_weights_shape),
                          a=tf.nn.elu,
                          dilation_rate=[1, 1]))

        # concat feature maps of all scale levels and reduce the number of features with a 1x1 conv
        layers.append(ConcatLayer(incomings=layers[1:]))
        conv_weights_shape = [
            1, 1, layers[-1].get_output_shape()[-1], basenr_convs
        ]
        layers.append(
            ConvLayer(incoming=layers[-1],
                      W=conv_W_initializer(conv_weights_shape)))

        # add 3 more conv layers to have some depth
        conv_weights_shape = [
            config.kernel_conv, config.kernel_conv,
            layers[-1].get_output_shape()[-1], basenr_convs
        ]
        layers.append(
            ConvLayer(incoming=layers[-1],
                      W=conv_W_initializer(conv_weights_shape)))
        layers.append(
            ConvLayer(incoming=layers[-1],
                      W=conv_W_initializer(conv_weights_shape)))
        layers.append(
            ConvLayer(incoming=layers[-1],
                      W=conv_W_initializer(conv_weights_shape)))

        #
        # Output Layer
        #
        layers.append(
            ConvLayer(incoming=layers[-1],
                      W=conv_W_initializer([
                          config.kernel_conv_out, config.kernel_conv_out,
                          layers[-1].get_output_shape()[-1], 11
                      ]),
                      padding='SAME',
                      name='ConvLayerSemanticSegmentation',
                      a=tf.identity))
        sem_seg_layer = layers[-1]

        self.X = X
        self.y_ = y_
        self.output = sem_seg_layer.get_output()
Beispiel #14
0
    def __init__(self, config: Config, dataset):
        """Architecture for semantic segmentation as described in presentation using standard for loop."""
        depth = config.get_value("enc_dec_depth", 2)
        basenr_convs = config.get_value("enc_dec_conv_maps_base", 16)
        include_org_label = config.get_value("include_org_label", False)
        init_name = config.get_value("conv_W_initializer",
                                     "weight_xavier_conv2d")
        conv_W_initializer = getattr(TeLL.initializations, init_name)

        #
        # Layer list
        #
        layers = list()

        #
        # Create placeholders for feeding an input frame and a label at the first timestep
        #
        n_seq_pos = dataset.X_shape[
            1]  # dataset.X_shape is [sample, seq_pos, x, y, features)
        X = tf.placeholder(tf.float32, shape=dataset.X_shape)
        y_ = tf.placeholder(tf.int32, shape=dataset.y_shape)

        if include_org_label:
            y_org = tf.placeholder(tf.int32, shape=dataset.y_org_shape)

        # ----------------------------------------------------------------------------------------------------------
        # Define network architecture
        # ----------------------------------------------------------------------------------------------------------
        # initializer for weight values of kernels
        # conv_W_initializer = weight_xavier_conv2d

        #
        # Initialize input to network of shape [sample, 1, x, y, features] with zero tensor of size of a frame
        #
        input_shape = dataset.X_shape[:1] + (1, ) + dataset.X_shape[2:]
        layers.append(RNNInputLayer(tf.zeros(input_shape, dtype=tf.float32)))
        rnn_input_layer = layers[-1]

        #
        # Encoder and maxpooling layers
        #
        encoders = list()
        for d in range(1, depth + 1):
            print("\tConvLayerEncoder{}...".format(d))
            layers.append(
                ConvLayer(incoming=layers[-1],
                          W=conv_W_initializer([
                              config.kernel_conv, config.kernel_conv,
                              layers[-1].get_output_shape()[-1],
                              basenr_convs * (2**d)
                          ]),
                          padding='SAME',
                          name='ConvLayerEncoder{}'.format(d),
                          a=tf.nn.elu))
            encoders.append(layers[-1])
            print("\tMaxpoolingLayer{}...".format(d))
            layers.append(
                MaxPoolingLayer(incoming=layers[-1],
                                ksize=(1, 3, 3, 1),
                                strides=(1, 2, 2, 1),
                                padding='SAME',
                                name='MaxpoolingLayer{}'.format(d)))

        #
        # ConvLSTM Layer
        #
        if config.n_lstm:
            n_lstm = config.n_lstm
            lstm_x_fwd = config.kernel_lstm_fwd
            lstm_y_fwd = config.kernel_lstm_fwd
            lstm_x_bwd = config.kernel_lstm_bwd
            lstm_y_bwd = config.kernel_lstm_bwd

            lstm_input_channels_fwd = layers[-1].get_output_shape()[-1]
            if config.reduced_rec_lstm:
                lstm_input_channels_bwd = config.reduced_rec_lstm
            else:
                lstm_input_channels_bwd = n_lstm

            lstm_init = dict(W_ci=[
                conv_W_initializer(
                    [lstm_x_fwd, lstm_y_fwd, lstm_input_channels_fwd, n_lstm]),
                conv_W_initializer(
                    [lstm_x_bwd, lstm_y_bwd, lstm_input_channels_bwd, n_lstm])
            ],
                             W_ig=[
                                 conv_W_initializer([
                                     lstm_x_fwd, lstm_y_fwd,
                                     lstm_input_channels_fwd, n_lstm
                                 ]),
                                 conv_W_initializer([
                                     lstm_x_bwd, lstm_y_bwd,
                                     lstm_input_channels_bwd, n_lstm
                                 ])
                             ],
                             W_og=[
                                 conv_W_initializer([
                                     lstm_x_fwd, lstm_y_fwd,
                                     lstm_input_channels_fwd, n_lstm
                                 ]),
                                 conv_W_initializer([
                                     lstm_x_bwd, lstm_y_bwd,
                                     lstm_input_channels_bwd, n_lstm
                                 ])
                             ],
                             W_fg=[
                                 conv_W_initializer([
                                     lstm_x_fwd, lstm_y_fwd,
                                     lstm_input_channels_fwd, n_lstm
                                 ]),
                                 conv_W_initializer([
                                     lstm_x_bwd, lstm_y_bwd,
                                     lstm_input_channels_bwd, n_lstm
                                 ])
                             ],
                             b_ci=constant([n_lstm]),
                             b_ig=constant([n_lstm]),
                             b_og=constant([n_lstm]),
                             b_fg=constant([n_lstm], 1))

            print("\tConvLSTM...")
            layers.append(
                ConvLSTMLayer(incoming=layers[-1],
                              n_units=n_lstm,
                              **lstm_init,
                              a_out=get_rec_attr(tf, config.lstm_act),
                              forgetgate=config.forgetgate,
                              comb=config.lstm_comb,
                              store_states=config.store_states,
                              tickerstep_biases=tf.zeros,
                              output_dropout=config.lstm_output_dropout,
                              precomp_fwds=False))
            lstm_layer = layers[-1]

            #
            # Optional maxpooling and upscaling of rec LSTM connections combined with/or optional feature squashing
            #
            ext_lstm_recurrence = None
            if config.lstm_rec_maxpooling:
                print("\tMaxpoolingDeconv...")
                layers.append(
                    MaxPoolingLayer(incoming=layers[-1],
                                    ksize=(1, 3, 3, 1),
                                    strides=(1, 2, 2, 1),
                                    padding='SAME',
                                    name='MaxPoolingLayer'))
                layers.append(
                    DeConvLayer(incoming=layers[-1],
                                a=tf.nn.elu,
                                W=conv_W_initializer([
                                    3, 3, layers[-1].get_output_shape()[-1],
                                    layers[-1].get_output_shape()[-1]
                                ]),
                                strides=(1, 2, 2, 1),
                                padding='SAME',
                                name='DeConvLayer'))
                print("\tConvLSTMRecurrence...")
                ext_lstm_recurrence = layers[-1]

            if config.reduced_rec_lstm:
                print("\tFeatureSquashing...")
                layers.append(
                    ConvLayer(incoming=layers[-1],
                              W=conv_W_initializer([
                                  config.kernel_conv_out,
                                  config.kernel_conv_out,
                                  layers[-1].get_output_shape()[-1],
                                  config.reduced_rec_lstm
                              ]),
                              padding='SAME',
                              name='ConvLayerFeatureSquashing',
                              a=tf.nn.elu))
                print("\tConvLSTMRecurrence...")
                ext_lstm_recurrence = layers[-1]

            if ext_lstm_recurrence is not None:
                lstm_layer.add_external_recurrence(ext_lstm_recurrence)
        else:
            print("\tSubstituteConvLayer...")
            n_lstm = basenr_convs * (2**depth) * 4
            layers.append(
                ConvLayer(incoming=layers[-1],
                          W=conv_W_initializer([
                              config.kernel_conv, config.kernel_conv,
                              layers[-1].get_output_shape()[-1],
                              int(basenr_convs * (2**depth) * 4.5)
                          ]),
                          padding='SAME',
                          name='SubstituteConvLayer',
                          a=tf.nn.elu))
            lstm_layer = layers[-1]

        #
        # Decoder and upscaling layers
        #
        for d in list(range(1, depth + 1))[::-1]:
            print("\tUpscalingLayer{}...".format(d))
            layers[-1] = ScalingLayer(
                incoming=layers[-1],
                size=encoders[d - 1].get_output_shape()[-3:-1],
                name='UpscalingLayergLayer{}'.format(d))

            print("\tConcatLayer{}...".format(d))
            layers.append(
                ConcatLayer([encoders[d - 1], layers[-1]],
                            name='ConcatLayer{}'.format(d)))

            print("\tConvLayerDecoder{}...".format(d))
            layers.append(
                ConvLayer(incoming=layers[-1],
                          W=conv_W_initializer([
                              config.kernel_conv, config.kernel_conv,
                              layers[-1].get_output_shape()[-1],
                              basenr_convs * (2**d)
                          ]),
                          padding='SAME',
                          name='ConvLayerDecoder{}'.format(d),
                          a=tf.nn.elu))

        #
        # ConvLayer for semantic segmentation
        #
        print("\tConvLayerSemanticSegmentation...")
        layers.append(
            ConvLayer(incoming=layers[-1],
                      W=conv_W_initializer([
                          config.kernel_conv_out, config.kernel_conv_out,
                          layers[-1].get_output_shape()[-1], 11
                      ]),
                      padding='SAME',
                      name='ConvLayerSemanticSegmentation',
                      a=tf.identity))
        sem_seg_layer = layers[-1]

        # ----------------------------------------------------------------------------------------------------------
        # Loop through sequence positions and create graph
        # ----------------------------------------------------------------------------------------------------------

        #
        # Loop through sequence positions
        #
        print("\tRNN Loop...")
        sem_seg_out = list()
        for seq_pos in range(n_seq_pos):
            with tf.name_scope("Sequence_pos_{}".format(seq_pos)):
                print("\t  seq. pos. {}...".format(seq_pos))
                # Set input layer to X at frame (t) and outputs of upper layers at (t-1)
                layers[0].update(X[:, seq_pos:seq_pos + 1, :])

                # Calculate new network output at (t), including new hidden states
                _ = lstm_layer.get_output()
                sem_seg_out.append(
                    sem_seg_layer.get_output(prev_layers=encoders +
                                             [lstm_layer]))

        #
        # Loop through tickersteps
        #
        # # Use empty frame as X during ticker steps (did not work so good)
        # tickerstep_input = tf.zeros(dataset.X_shape[:1] + (1,) + dataset.X_shape[2:], dtype=tf.float32,
        #                             name="tickerframe")

        # Use last frame as X during ticker steps
        #tickerstep_input = X[:, -1:, :]
        #layers[0].update(tickerstep_input)

        #for tickerstep in range(config.tickersteps):
        #    with tf.name_scope("Tickerstep_{}".format(tickerstep)):
        #        print("\t  tickerstep {}...".format(tickerstep))
        #
        #        # Calculate new network output at (t), including new hidden states
        #        _ = lstm_layer.get_output(tickerstep_nodes=True)

        #sem_seg_out = sem_seg_layer.get_output(prev_layers=encoders + [lstm_layer])

        print("\tDone!")

        #
        # Publish
        #
        self.X = X
        self.y_feed = y_
        self.y_ = y_[:, 10:]
        self.output = tf.concat(sem_seg_out[10:], 1)
        self.__layers = layers
        self.__n_lstm = n_lstm
        self.__lstm_layer = lstm_layer
        self.lstm_layer = lstm_layer
        self.__plot_dict, self.__plot_range_dict, self.__plotsink = self.__setup_plotting(
            config)
        if include_org_label:
            self.y_org = y_org
def main(_):
    config = Config()
    # Create new TeLL session with two summary writers
    tell = TeLLSession(config=config, summaries=["train", "validation"])

    # Get some members from the session for easier usage
    session = tell.tf_session
    summary_writer_train, summary_writer_validation = tell.tf_summaries[
        "train"], tell.tf_summaries["validation"]
    model = tell.model
    workspace, config = tell.workspace, tell.config

    # Parameters
    learning_rate = config.get_value("learning_rate", 1e-3)
    iterations = config.get_value("iterations", 1000)
    batchsize = config.get_value("batchsize", 250)
    display_step = config.get_value("display_step", 10)
    dropout = config.get_value("dropout_prob", 0.25)

    #
    # Prepare input data
    #

    # Set datareaders
    training_reader = MNISTReader(dset='train')
    validation_reader = MNISTReader(dset='validation')
    test_reader = MNISTReader(dset='test')

    # Set Preprocessing
    training_data_preprocessed = DataProcessing(training_reader, apply_to='X')
    training_data_preprocessed = Normalize(training_data_preprocessed,
                                           apply_to='X')
    training_data_preprocessed = Normalize(training_data_preprocessed,
                                           apply_to=['X', 'Y'])

    # Set minibatch loaders
    training_loader = DataLoader(training_data_preprocessed,
                                 batchsize=50,
                                 batchsize_method='zeropad')
    validation_loader = DataLoader(validation_reader,
                                   batchsize=50,
                                   batchsize_method='zeropad')
    test_loader = DataLoader(test_reader,
                             batchsize=50,
                             batchsize_method='zeropad')

    #
    # Define loss and optimizer
    #
    with tf.name_scope("Cost"):
        cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=model.output,
                                                    labels=model.y_))
        decor1 = decor_penalty(model.hidden1, model.y_, 10, [1], 0.)
        decor2 = decor_penalty(model.hidden2, model.y_, 10, [1], 6e-5)
        optimizer = tf.train.AdamOptimizer(
            learning_rate=learning_rate).minimize(cost + decor1 + decor2)
        tf.summary.scalar("Loss", cost)
        tf.summary.scalar("Decor", decor1 + decor2)

    # Evaluate model
    with tf.name_scope("Accuracy"):
        correct_pred = tf.equal(tf.argmax(model.output, 1),
                                tf.argmax(model.y_, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
        tf.summary.scalar("Accuracy", accuracy)

    merged_summaries = tf.summary.merge_all()

    # Initialize tensorflow variables (either initializes them from scratch or restores from checkpoint)
    step = tell.initialize_tf_variables().global_step

    # -------------------------------------------------------------------------
    # Start training
    # -------------------------------------------------------------------------
    acc_train = 0.
    try:
        while step < iterations:
            # Loop through training set
            for mb_i, mb in enumerate(
                    training_loader.batch_loader(num_cached=5, num_threads=3)):
                check_kill_file(workspace=workspace)

                # Perform weight update
                summary, acc_train, _ = session.run(
                    [merged_summaries, accuracy, optimizer],
                    feed_dict={
                        model.X: mb['X'],
                        model.y_: mb['y'],
                        model.dropout: dropout
                    })
                summary_writer_train.add_summary(summary,
                                                 mb_i + step * batchsize)

                if step % display_step == 0:
                    # Loop through validation set
                    cos_sum, acc_sum, cor_sum = (0, 0, 0)
                    for vmb_i, vmb in enumerate(
                            validation_loader.batch_loader(num_cached=5,
                                                           num_threads=3)):
                        cos, acc, cor = session.run(
                            [cost, accuracy, correct_pred],
                            feed_dict={
                                model.X: vmb['X'],
                                model.y_: vmb['y'],
                                model.dropout: 0
                            })
                        cos_sum += cos
                        acc_sum += acc
                        cor_sum += cor
                    print('step {}: train acc {}, valid acc {}'.format(
                        mb_i + step * batchsize, cos_sum / vmb_i,
                        acc_sum / vmb_i, cor_sum / vmb_i))

                step += 1
                if step >= iterations:
                    break

        print("Training Finished!")

        # Final Eval
        for tmb_i, tmb in enumerate(
                test_loader.batch_loader(num_cached=len(
                    test_reader.get_sample_keys()),
                                         num_threads=1)):
            print(
                "Test Accuracy:",
                session.run(accuracy,
                            feed_dict={
                                model.X: tmb['X'],
                                model.y_: tmb['y'],
                                model.dropout: 0
                            }))
    except AbortRun:
        print("Aborting...")
    finally:
        tell.close(global_step=step)
Beispiel #16
0
def main(_):

    # ------------------------------------------------------------------------------------------------------------------
    # Setup training
    # ------------------------------------------------------------------------------------------------------------------

    # Initialize config, parses command line and reads specified file; also supports overriding of values from cmd
    config = Config()

    #
    # Load datasets for training and validation
    #
    with Timer(name="Loading Data", verbose=True):
        # Make sure datareader is reproducible
        random_seed = config.get_value('random_seed', 12345)
        np.random.seed(
            random_seed)  # not threadsafe, use rnd_gen object where possible
        rnd_gen = np.random.RandomState(seed=random_seed)

        print("Loading training data...")
        trainingset = MovingDotDataset(n_timesteps=5,
                                       n_samples=50,
                                       batchsize=config.batchsize,
                                       rnd_gen=rnd_gen)
        print("Loading validation data...")
        validationset = MovingDotDataset(n_timesteps=5,
                                         n_samples=25,
                                         batchsize=config.batchsize,
                                         rnd_gen=rnd_gen)

    #
    # Initialize TeLL session
    #
    tell = TeLLSession(config=config,
                       summaries=["train", "validation"],
                       model_params={"dataset": trainingset})

    # Get some members from the session for easier usage
    sess = tell.tf_session
    summary_writer_train, summary_writer_validation = tell.tf_summaries[
        "train"], tell.tf_summaries["validation"]
    model = tell.model
    workspace, config = tell.workspace, tell.config

    #
    # Define loss functions and update steps
    #
    print("Initializing loss calculation...")
    pos_target_weight = np.prod(
        trainingset.y_shape[2:]
    ) - 1  # only 1 pixel per sample is of positive class -> up-weight!
    loss = tf.reduce_mean(
        tf.nn.weighted_cross_entropy_with_logits(targets=model.y_,
                                                 logits=model.output,
                                                 pos_weight=pos_target_weight))
    # loss = tf.reduce_mean(-tf.reduce_sum((model.y_ * tf.log(model.output)) *
    #                                      -tf.reduce_sum(model.y_ - 1) / tf.reduce_sum(model.y_),
    #                                      axis=[1, 2, 3, 4]))
    train_summary = tf.summary.scalar(
        "Training Loss", loss)  # create summary to add to tensorboard

    # Loss function for validationset
    val_loss = tf.reduce_mean(
        tf.nn.weighted_cross_entropy_with_logits(targets=model.y_,
                                                 logits=model.output,
                                                 pos_weight=pos_target_weight))
    # val_loss = tf.reduce_mean(-tf.reduce_sum(model.y_ * tf.log(model.output) *
    #                                          -tf.reduce_sum(model.y_ - 1) / tf.reduce_sum(model.y_),
    #                                          axis=[1, 2, 3, 4]))
    val_loss_summary = tf.summary.scalar(
        "Validation Loss", val_loss)  # create summary to add to tensorboard

    # Regularization
    reg_penalty = regularize(layers=model.get_layers(),
                             l1=config.l1,
                             l2=config.l2,
                             regularize_weights=True,
                             regularize_biases=True)
    regpen_summary = tf.summary.scalar(
        "Regularization Penalty",
        reg_penalty)  # create summary to add to tensorboard

    # Update step for weights
    update = update_step(loss + reg_penalty, config)

    #
    # Prepare plotting
    #
    plot_elements_sym = list(model.get_plot_dict().values())
    plot_elements = list()
    plot_ranges = model.get_plot_range_dict()

    #
    # Initialize tensorflow variables (either initializes them from scratch or restores from checkpoint)
    #
    global_step = tell.initialize_tf_variables().global_step

    #
    # Finalize graph
    #  This makes our tensorflow graph read-only and prevents further additions to the graph
    #
    sess.graph.finalize()
    if sess.graph.finalized:
        print("Graph is finalized!")
    else:
        raise ValueError("Could not finalize graph!")

    sys.stdout.flush()

    # ------------------------------------------------------------------------------------------------------------------
    # Start training
    # ------------------------------------------------------------------------------------------------------------------

    try:
        epoch = int(global_step / trainingset.n_mbs)
        epochs = range(epoch, config.n_epochs)

        # Loop through epochs
        print("Starting training")

        for ep in epochs:
            epoch = ep
            print("Starting training epoch: {}".format(ep))
            # Initialize variables for over-all loss per epoch
            train_loss = 0

            # Load one minibatch at a time and perform a training step
            t_mb = Timer(verbose=True, name="Load Minibatch")
            mb_training = trainingset.batch_loader(rnd_gen=rnd_gen)

            #
            # Loop through minibatches
            #
            for mb_i, mb in enumerate(mb_training):
                sys.stdout.flush()
                # Print minibatch load time
                t_mb.print()

                # Abort if indicated by file
                check_kill_file(workspace)

                #
                # Calculate scores on validation set
                #
                if global_step % config.score_at == 0:
                    print("Starting scoring on validation set...")
                    evaluate_on_validation_set(validationset, global_step,
                                               sess, model,
                                               summary_writer_validation,
                                               val_loss_summary, val_loss,
                                               workspace)

                #
                # Perform weight updates and do plotting
                #
                if (mb_i % config.plot_at) == 0 and os.path.isfile(
                        workspace.get_plot_file()):
                    # Perform weight update, return summary_str and values for plotting
                    with Timer(verbose=True, name="Weight Update"):
                        train_summ, regpen_summ, _, cur_loss, cur_output, *plot_elements = sess.run(
                            [
                                train_summary, regpen_summary, update, loss,
                                model.output, *plot_elements_sym
                            ],
                            feed_dict={
                                model.X: mb['X'],
                                model.y_: mb['y']
                            })

                    # Add current summary values to tensorboard
                    summary_writer_train.add_summary(train_summ,
                                                     global_step=global_step)
                    summary_writer_train.add_summary(regpen_summ,
                                                     global_step=global_step)

                    # Re-associate returned tensorflow values to plotting keys
                    plot_dict = OrderedDict(
                        zip(list(model.get_plot_dict().keys()), plot_elements))

                    #
                    # Plot subplots in plot_dict
                    # Loop through each element in plotlist and pass it to the save_subplots function for plotting
                    # (adapt this to your needs for plotting)
                    #
                    with Timer(verbose=True, name="Plotting",
                               precision="msec"):
                        for plotlist_i, plotlist in enumerate(
                                model.get_plotsink()):
                            for frame in range(len(plot_dict[plotlist[0]])):
                                subplotlist = []
                                subfigtitles = []
                                subplotranges = []
                                n_cols = int(np.ceil(np.sqrt(len(plotlist))))

                                for col_i, col_i in enumerate(range(n_cols)):
                                    subfigtitles.append(
                                        plotlist[n_cols *
                                                 col_i:n_cols * col_i +
                                                 n_cols])
                                    subplotlist.append([
                                        plot_dict[p]
                                        [frame *
                                         (frame < len(plot_dict[p])), :]
                                        for p in plotlist[n_cols *
                                                          col_i:n_cols *
                                                          col_i + n_cols]
                                    ])
                                    subplotranges.append([
                                        plot_ranges.get(p, False)
                                        for p in plotlist[n_cols *
                                                          col_i:n_cols *
                                                          col_i + n_cols]
                                    ])

                                # remove rows/columns without images
                                subplotlist = [
                                    p for p in subplotlist if p != []
                                ]

                                plot_args = dict(
                                    images=subplotlist,
                                    filename=os.path.join(
                                        workspace.get_result_dir(),
                                        "plot{}_ep{}_mb{}_fr{}.png".format(
                                            plotlist_i, ep, mb_i, frame)),
                                    subfigtitles=subfigtitles,
                                    subplotranges=subplotranges)
                                plotter.set_plot_kwargs(plot_args)
                                plotter.plot()

                    # Plot outputs and cell states over frames if specified
                    if config.store_states and 'ConvLSTMLayer_h' in plot_dict:
                        convh = plot_dict['ConvLSTMLayer_h']
                        convrh = [c[0, :, :, 0] for c in convh]
                        convrh = [
                            convrh[:6], convrh[6:12], convrh[12:18],
                            convrh[18:24], convrh[24:]
                        ]
                        plot_args = dict(images=convrh,
                                         filename=os.path.join(
                                             workspace.get_result_dir(),
                                             "plot{}_ep{}_mb{}_h.png".format(
                                                 plotlist_i, ep, mb_i)))
                        plotter.set_plot_kwargs(plot_args)
                        plotter.plot()

                    if config.store_states and 'ConvLSTMLayer_c' in plot_dict:
                        convc = plot_dict['ConvLSTMLayer_c']
                        convrc = [c[0, :, :, 0] for c in convc]
                        convrc = [
                            convrc[:6], convrc[6:12], convrc[12:18],
                            convrc[18:24], convrc[24:]
                        ]
                        plot_args = dict(images=convrc,
                                         filename=os.path.join(
                                             workspace.get_result_dir(),
                                             "plot{}_ep{}_mb{}_c.png".format(
                                                 plotlist_i, ep, mb_i)))
                        plotter.set_plot_kwargs(plot_args)
                        plotter.plot()

                else:
                    #
                    # Perform weight update without plotting
                    #
                    with Timer(verbose=True, name="Weight Update"):
                        train_summ, regpen_summ, _, cur_loss = sess.run(
                            [train_summary, regpen_summary, update, loss],
                            feed_dict={
                                model.X: mb['X'],
                                model.y_: mb['y']
                            })

                    # Add current summary values to tensorboard
                    summary_writer_train.add_summary(train_summ,
                                                     global_step=global_step)
                    summary_writer_train.add_summary(regpen_summ,
                                                     global_step=global_step)

                # Add current loss to running average loss
                train_loss += cur_loss

                # Print some status info
                print("ep {} mb {} loss {} (avg. loss {})".format(
                    ep, mb_i, cur_loss, train_loss / (mb_i + 1)))

                # Reset timer
                t_mb = Timer(name="Load Minibatch")

                # Free the memory allocated for the minibatch data
                mb.clear()
                del mb

                global_step += 1

            #
            # Calculate scores on validation set
            #

            # Perform scoring on validation set
            print("Starting scoring on validation set...")
            evaluate_on_validation_set(validationset, global_step, sess, model,
                                       summary_writer_validation,
                                       val_loss_summary, val_loss, workspace)

            # Save the model
            tell.save_checkpoint(global_step=global_step)

            # Abort if indicated by file
            check_kill_file(workspace)

    except AbortRun:
        print("Detected kill file, aborting...")

    finally:
        #
        # If the program executed correctly or an error was raised, close the data readers and save the model and exit
        #
        trainingset.close()
        validationset.close()
        tell.close(save_checkpoint=True, global_step=global_step)
        plotter.close()
Beispiel #17
0
def main(_):
    config = Config()
    np.random.seed(config.get_value("random_seed", 12345))

    # PARAMETERS
    n_epochs = config.get_value("epochs", 100)
    batchsize = config.get_value("batchsize", 8)
    n_classes = config.get_value("n_classes", 13)
    dropout = config.get_value("dropout", 0.25)  # TODO
    num_threads = config.get_value("num_threads", 5)
    initial_val = config.get_value("initial_val", True)

    # READER, LOADER
    readers = invoke_dataset_from_config(config)
    reader_train = readers["train"]
    reader_val = readers["val"]
    train_loader = torch.utils.data.DataLoader(reader_train,
                                               batch_size=config.batchsize,
                                               shuffle=True,
                                               num_workers=num_threads)
    val_loader = torch.utils.data.DataLoader(reader_val,
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=num_threads)

    # CONFIG
    tell = TeLLSession(config=config,
                       model_params={"shape": reader_train.shape})
    # Get some members from the session for easier usage
    session = tell.tf_session
    model = tell.model
    workspace, config = tell.workspace, tell.config

    prediction = tf.sigmoid(model.output)
    prediction_val = tf.reduce_mean(tf.sigmoid(model.output),
                                    axis=0,
                                    keepdims=True)

    # LOSS
    if hasattr(model, "loss"):
        loss = model.loss()
    else:
        with tf.name_scope("Loss_per_Class"):
            loss = 0
            for i in range(n_classes):
                loss_batch = tf.nn.sigmoid_cross_entropy_with_logits(
                    logits=model.output[:, i], labels=model.y_[:, i])
                loss_mean = tf.reduce_mean(loss_batch)
                loss += loss_mean

    # Validation loss after patching
    if hasattr(model, "loss"):
        loss_val = model.loss()
    else:
        with tf.name_scope("Loss_per_Class_Patching"):
            loss_val = 0
            for i in range(n_classes):
                loss_batch = tf.nn.sigmoid_cross_entropy_with_logits(
                    logits=tf.reduce_mean(model.output[:, i],
                                          axis=0,
                                          keepdims=True),
                    labels=model.y_[:, i])
                loss_mean = tf.reduce_mean(loss_batch)
                loss_val += loss_mean

    # REGULARIZATION
    reg_penalty = regularize(layers=model.layers,
                             l1=config.l1,
                             l2=config.l2,
                             regularize_weights=True,
                             regularize_biases=True)

    # LEARNING RATE (SCHEDULE)
    # if a LRS is defined always use MomentumOptimizer and pass learning rate to optimizer
    lrs_plateu = False
    if config.get_value("lrs", None) is not None:
        lr_sched_type = config.lrs["type"]
        if lr_sched_type == "plateau":
            lrs_plateu = True
            learning_rate = tf.placeholder(tf.float32, [],
                                           name='learning_rate')
            lrs_learning_rate = config.get_value(
                "optimizer_params")["learning_rate"]
            lrs_n_bad_epochs = 0  # counter for plateu LRS
            lrs_patience = config.lrs["patience"]
            lrs_factor = config.lrs["factor"]
            lrs_threshold = config.lrs["threshold"]
            lrs_mode = config.lrs["mode"]
            lrs_best = -np.inf if lrs_mode == "max" else np.inf
            lrs_is_better = lambda old, new: (new > old * (
                1 + lrs_threshold)) if lrs_mode == "max" else (new < old * (
                    1 - lrs_threshold))
    else:
        learning_rate = None  # if no LRS is defined the default optimizer is used with its defined learning rate

    # LOAD WEIGHTS and get list of trainables if specified
    assign_loaded_variables = None
    trainables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
    if config.get_value("checkpoint", None) is not None:
        with Timer(name="Loading Checkpoint", verbose=True):
            assign_loaded_variables, trainables = tell.load_weights(
                config.get_value("checkpoint", None),
                config.get_value("freeze", False),
                config.get_value("exclude_weights", None),
                config.get_value("exclude_freeze", None))

    # Update step
    if len(trainables) > 0:
        update, gradients, gradient_name_dict = update_step(
            loss + reg_penalty,
            config,
            tell,
            lr=learning_rate,
            trainables=trainables)

    # INITIALIZE Tensorflow VARIABLES
    step = tell.initialize_tf_variables().global_step

    # ASSING LOADED WEIGHTS (overriding initializations) if available
    if assign_loaded_variables is not None:
        session.run(assign_loaded_variables)

    # -------------------------------------------------------------------------
    # Start training
    # -------------------------------------------------------------------------
    try:
        n_mbs = len(train_loader)
        epoch = int((step * batchsize) / (n_mbs * batchsize))
        epochs = range(epoch, n_epochs)

        if len(trainables) == 0:
            validate(val_loader, n_classes, session, loss_val, prediction_val,
                     model, workspace, step, batchsize, tell)
            return

        print("Epoch: {}/{} (step: {}, nmbs: {}, batchsize: {})".format(
            epoch + 1, n_epochs, step, n_mbs, batchsize))
        for ep in epochs:
            if ep == 0 and initial_val:
                f1 = validate(val_loader, n_classes, session, loss_val,
                              prediction_val, model, workspace, step,
                              batchsize, tell)
            else:
                if config.has_value("lrs_best") and config.has_value(
                        "lrs_learning_rate") and config.has_value(
                            "lrs_n_bad_epochs"):
                    f1 = config.get_value("lrs_f1")
                    lrs_best = config.get_value("lrs_best")
                    lrs_learning_rate = config.get_value("lrs_learning_rate")
                    lrs_n_bad_epochs = config.get_value("lrs_n_bad_epochs")
                else:
                    f1 = 0

            # LRS "Plateu"
            if lrs_plateu:
                # update scheduler
                if lrs_is_better(lrs_best, f1):
                    lrs_best = f1
                    lrs_n_bad_epochs = 0
                else:
                    lrs_n_bad_epochs += 1
                # update learning rate
                if lrs_n_bad_epochs > lrs_patience:
                    lrs_learning_rate = max(lrs_learning_rate * lrs_factor, 0)
                    lrs_n_bad_epochs = 0

            with tqdm(total=len(train_loader),
                      desc="Training [{}/{}]".format(ep + 1,
                                                     len(epochs))) as pbar:
                for mbi, mb in enumerate(train_loader):
                    # LRS "Plateu"
                    if lrs_plateu:
                        feed_dict = {
                            model.X: mb['input'].numpy(),
                            model.y_: mb['target'].numpy(),
                            model.dropout: dropout,
                            learning_rate: lrs_learning_rate
                        }
                    else:
                        feed_dict = {
                            model.X: mb['input'].numpy(),
                            model.y_: mb['target'].numpy(),
                            model.dropout: dropout
                        }

                    # TRAINING
                    pred, loss_train, _ = session.run(
                        [prediction, loss, update], feed_dict=feed_dict)

                    # Update status
                    pbar.set_description_str(
                        "Training [{}/{}] Loss: {:.4f}".format(
                            ep + 1, len(epochs), loss_train))
                    pbar.update()
                    step += 1

            validate(val_loader, n_classes, session, loss_val, prediction_val,
                     model, workspace, step, batchsize, tell)
    except AbortRun:
        print("Aborting...")
    finally:
        tell.close(global_step=step, save_checkpoint=True)
Beispiel #18
0
    def __init__(self, config: Config, dataset):
        """Example for convolutional network with convLSTM and convolutional output layer; Plots cell states, hidden
        states, X, y_, and a argmax over the convLSTM units outputs;
        
        Command-line usage:
        >>> python3 samples/main_convlstm_mnist.py --config=samples/config_convlstm_mnist.json
        """
        import TeLL

        #
        # Some convenience objects
        #
        # We will use a list to store all layers for regularization etc. (this is optional)
        layers = []

        depth = config.get_value("enc_dec_depth", 2)
        basenr_convs = config.get_value("enc_dec_conv_maps_base", 16)
        init_name = config.get_value("conv_W_initializer",
                                     "weight_xavier_conv2d")
        conv_W_initializer = getattr(TeLL.initializations, init_name)

        #
        # Create placeholders for feeding an input frame and a label at each sequence position
        #
        X_shape = dataset.mb_info['X'][
            0]  # dataset.X_shape is [sample, seq_pos, x, y, features)
        y_shape = dataset.mb_info['y'][0]
        frame_input_shape = X_shape[:1] + (1, ) + X_shape[2:]
        n_classes = 11
        frame_output_shape = y_shape[:1] + (1, ) + y_shape[2:] + (n_classes, )
        n_seq_pos = X_shape[1]
        X = tf.placeholder(tf.float32, shape=X_shape)
        y_ = tf.placeholder(
            tf.int32,
            shape=y_shape)  # dataset.y_shape is [sample, seq_pos, features)

        # ----------------------------------------------------------------------------------------------------------
        # Define network architecture
        # ----------------------------------------------------------------------------------------------------------

        #
        # Initialize input to network of shape [sample, 1, x, y, features] with zero tensor of size of a frame
        #
        rnn_input_layer = RNNInputLayer(
            tf.zeros(frame_input_shape, dtype=tf.float32))
        layers.append(rnn_input_layer)

        #
        # Encoder and maxpooling layers
        #
        encoders = list()
        for d in range(1, depth + 1):
            print("\tConvLayerEncoder{}...".format(d))
            layers.append(
                ConvLayer(incoming=layers[-1],
                          W=conv_W_initializer([
                              config.kernel_conv, config.kernel_conv,
                              layers[-1].get_output_shape()[-1],
                              basenr_convs * (2**d)
                          ]),
                          padding='SAME',
                          name='ConvLayerEncoder{}'.format(d),
                          a=tf.nn.elu))
            encoders.append(layers[-1])
            print("\tMaxpoolingLayer{}...".format(d))
            layers.append(
                MaxPoolingLayer(incoming=layers[-1],
                                ksize=(1, 3, 3, 1),
                                strides=(1, 2, 2, 1),
                                padding='SAME',
                                name='MaxpoolingLayer{}'.format(d)))

        #
        # ConvLSTM Layer
        #
        if config.n_lstm:
            n_lstm = config.n_lstm
            lstm_x_fwd = config.kernel_lstm_fwd
            lstm_y_fwd = config.kernel_lstm_fwd
            lstm_x_bwd = config.kernel_lstm_bwd
            lstm_y_bwd = config.kernel_lstm_bwd

            lstm_input_channels_fwd = layers[-1].get_output_shape()[-1]
            if config.reduced_rec_lstm:
                lstm_input_channels_bwd = config.reduced_rec_lstm
            else:
                lstm_input_channels_bwd = n_lstm

            lstm_init = dict(W_ci=[
                conv_W_initializer(
                    [lstm_x_fwd, lstm_y_fwd, lstm_input_channels_fwd, n_lstm]),
                conv_W_initializer(
                    [lstm_x_bwd, lstm_y_bwd, lstm_input_channels_bwd, n_lstm])
            ],
                             W_ig=[
                                 conv_W_initializer([
                                     lstm_x_fwd, lstm_y_fwd,
                                     lstm_input_channels_fwd, n_lstm
                                 ]),
                                 conv_W_initializer([
                                     lstm_x_bwd, lstm_y_bwd,
                                     lstm_input_channels_bwd, n_lstm
                                 ])
                             ],
                             W_og=[
                                 conv_W_initializer([
                                     lstm_x_fwd, lstm_y_fwd,
                                     lstm_input_channels_fwd, n_lstm
                                 ]),
                                 conv_W_initializer([
                                     lstm_x_bwd, lstm_y_bwd,
                                     lstm_input_channels_bwd, n_lstm
                                 ])
                             ],
                             W_fg=[
                                 conv_W_initializer([
                                     lstm_x_fwd, lstm_y_fwd,
                                     lstm_input_channels_fwd, n_lstm
                                 ]),
                                 conv_W_initializer([
                                     lstm_x_bwd, lstm_y_bwd,
                                     lstm_input_channels_bwd, n_lstm
                                 ])
                             ],
                             b_ci=constant([n_lstm]),
                             b_ig=constant([n_lstm]),
                             b_og=constant([n_lstm]),
                             b_fg=constant([n_lstm], 1))

            print("\tConvLSTM...")
            layers.append(
                ConvLSTMLayer(incoming=layers[-1],
                              n_units=n_lstm,
                              **lstm_init,
                              a_out=get_rec_attr(tf, config.lstm_act),
                              forgetgate=config.forgetgate,
                              store_states=config.store_states,
                              tickerstep_biases=tf.zeros,
                              output_dropout=config.lstm_output_dropout,
                              precomp_fwds=False))
            lstm_layer = layers[-1]

            #
            # Optional feature squashing
            #
            ext_lstm_recurrence = None

            if config.reduced_rec_lstm:
                print("\tFeatureSquashing...")
                layers.append(
                    ConvLayer(incoming=layers[-1],
                              W=conv_W_initializer([
                                  config.kernel_conv_out,
                                  config.kernel_conv_out,
                                  layers[-1].get_output_shape()[-1],
                                  config.reduced_rec_lstm
                              ]),
                              padding='SAME',
                              name='ConvLayerFeatureSquashing',
                              a=tf.nn.elu))
                print("\tConvLSTMRecurrence...")
                ext_lstm_recurrence = layers[-1]

            if ext_lstm_recurrence is not None:
                lstm_layer.add_external_recurrence(ext_lstm_recurrence)
        else:
            print("\tSubstituteConvLayer...")
            layers.append(
                ConvLayer(incoming=layers[-1],
                          W=conv_W_initializer([
                              config.kernel_conv, config.kernel_conv,
                              layers[-1].get_output_shape()[-1],
                              int(basenr_convs * (2**depth) * 4.5)
                          ]),
                          padding='SAME',
                          name='SubstituteConvLayer',
                          a=tf.nn.elu))
            lstm_layer = layers[-1]

        #
        # ConvLayer for semantic segmentation
        #
        print("\tConvLayerSemanticSegmentation...")
        layers.append(
            ConvLayer(incoming=layers[-1],
                      W=conv_W_initializer([
                          config.kernel_conv_out, config.kernel_conv_out,
                          layers[-1].get_output_shape()[-1], n_classes
                      ]),
                      padding='SAME',
                      name='ConvLayerSemanticSegmentation',
                      a=tf.identity))

        #
        # Upscaling layer
        #
        print("\tUpscalingLayer...")
        layers[-1] = ScalingLayer(incoming=layers[-1],
                                  size=frame_output_shape[-3:-1],
                                  name='UpscalingLayergLayer')

        output_layer = layers[-1]

        # ----------------------------------------------------------------------------------------------------------
        #  Create graph through sequence positions and ticker steps
        # ----------------------------------------------------------------------------------------------------------
        outputs_all_timesteps = []

        #
        # Loop through sequence positions
        #
        print("\tRNN Loop...")
        for seq_pos in range(n_seq_pos):
            with tf.name_scope("Sequence_pos_{}".format(seq_pos)):
                print("\t  seq. pos. {}...".format(seq_pos))
                # Set rnn input layer to current frame
                rnn_input_layer.update(X[:, seq_pos:seq_pos + 1, :])

                # Calculate new network state at new frame (this updates the network's hidden activations, cell states,
                # and dependencies automatically)
                output = output_layer.get_output()
                outputs_all_timesteps.append(output)

        #
        # Loop through tickersteps
        #
        # Use last frame as input during ticker steps
        tickerstep_input = X[:, -1:, :]

        for tickerstep in range(config.tickersteps):
            with tf.name_scope("Tickerstep_{}".format(tickerstep)):
                print("\t  tickerstep {}...".format(tickerstep))

                # Set rnn input layer to tickerstep input
                rnn_input_layer.update(tickerstep_input)

                # Calculate new network state at new frame and activate tickerstep biases
                output = output_layer.get_output(tickerstep_nodes=True)
                outputs_all_timesteps.append(output)

        print("\tDone!")

        #
        # Publish
        #
        self.X = X
        self.y_ = y_
        self.output = tf.concat(outputs_all_timesteps,
                                axis=1,
                                name='outputs_all_timesteps')
        pixel_weights = tf.ones_like(y_, dtype=tf.float32)
        # pixel_weights -= tf.cast(y_ == 0, dtype=tf.float32) * tf.constant(1.-0.2)
        self.pixel_weights = pixel_weights
        # We will use this list of layers for regularization in the main file
        self.__layers = layers
        # We will plot some parts of the lstm, so we make it accessible as attribute
        self.lstm_layer = lstm_layer