Beispiel #1
0
def invoke_dataset_from_config(config: Config, required: Union[str, list, tuple] = None):
    """
    Initializes datasets from config. Imports specified data reader and instantiates it with parameters from config.
    :param config: config
    :param required: string, list or tuple specifying which datasets have to be loaded (e.g. ["train", "val"])
    :return: initialized data readers
    """
    # Initialize Data Reader if specified
    readers = {}
    if config.has_value("dataset"):
        def to_list(value):
            if value is None:
                result = []
            elif isinstance(value, str):
                result = list([value])
            else:
                result = list(value)
            return result
        
        dataset = config.dataset
        required = to_list(required)
        
        try:
            reader_class = import_object(dataset["reader"])
            reader_args = inspect.signature(reader_class).parameters.keys()
            datasets = [key for key in dataset.keys() if key not in reader_args and key != "reader"]
            global_args = [key for key in dataset.keys() if key not in datasets and key != "reader"]
            
            # check for required datasets before loading anything
            if required is not None:
                required = to_list(required)
                missing = [d for d in required if d not in datasets]
                if len(missing) > 0:
                    raise Exception("Missing required dataset(s) {}".format(missing))
            
            # read "global" parameters
            global_pars = {}
            for key in global_args:
                value = dataset[key]
                global_pars[key] = value
                if isinstance(value, str) and "import::" in value:
                    global_pars[key] = import_object(value[len("import::"):])
                if key == "transforms":
                    global_pars[key] = Compose([invoke_functional_with_params(t) for t in value])
            
            # read dataset specific parameters
            for dset in datasets:
                # inspect parameters and resolve if necessary
                for key, value in dataset[dset].items():
                    if isinstance(value, str) and "import::" in value:
                        dataset[dset][key] = import_object(value[len("import::"):])
                    if key == "transforms":
                        dataset[dset][key] = Compose([invoke_functional_with_params(t) for t in value])
                print("Loading dataset '{}'...".format(dset))
                readers[dset] = reader_class(**{**global_pars, **dataset[dset]})
        except (AttributeError, TypeError) as e:
            print("Unable to import '{}'".format(e))
            raise e
    return readers
Beispiel #2
0
def main(_):
    config = Config()
    np.random.seed(config.get_value("random_seed", 12345))

    # PARAMETERS
    n_epochs = config.get_value("epochs", 100)
    batchsize = config.get_value("batchsize", 8)
    n_classes = config.get_value("n_classes", 13)
    dropout = config.get_value("dropout", 0.25)  # TODO
    num_threads = config.get_value("num_threads", 5)
    initial_val = config.get_value("initial_val", True)

    # READER, LOADER
    readers = invoke_dataset_from_config(config)
    reader_train = readers["train"]
    reader_val = readers["val"]
    train_loader = torch.utils.data.DataLoader(reader_train,
                                               batch_size=config.batchsize,
                                               shuffle=True,
                                               num_workers=num_threads)
    val_loader = torch.utils.data.DataLoader(reader_val,
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=num_threads)

    # CONFIG
    tell = TeLLSession(config=config,
                       model_params={"shape": reader_train.shape})
    # Get some members from the session for easier usage
    session = tell.tf_session
    model = tell.model
    workspace, config = tell.workspace, tell.config

    prediction = tf.sigmoid(model.output)
    prediction_val = tf.reduce_mean(tf.sigmoid(model.output),
                                    axis=0,
                                    keepdims=True)

    # LOSS
    if hasattr(model, "loss"):
        loss = model.loss()
    else:
        with tf.name_scope("Loss_per_Class"):
            loss = 0
            for i in range(n_classes):
                loss_batch = tf.nn.sigmoid_cross_entropy_with_logits(
                    logits=model.output[:, i], labels=model.y_[:, i])
                loss_mean = tf.reduce_mean(loss_batch)
                loss += loss_mean

    # Validation loss after patching
    if hasattr(model, "loss"):
        loss_val = model.loss()
    else:
        with tf.name_scope("Loss_per_Class_Patching"):
            loss_val = 0
            for i in range(n_classes):
                loss_batch = tf.nn.sigmoid_cross_entropy_with_logits(
                    logits=tf.reduce_mean(model.output[:, i],
                                          axis=0,
                                          keepdims=True),
                    labels=model.y_[:, i])
                loss_mean = tf.reduce_mean(loss_batch)
                loss_val += loss_mean

    # REGULARIZATION
    reg_penalty = regularize(layers=model.layers,
                             l1=config.l1,
                             l2=config.l2,
                             regularize_weights=True,
                             regularize_biases=True)

    # LEARNING RATE (SCHEDULE)
    # if a LRS is defined always use MomentumOptimizer and pass learning rate to optimizer
    lrs_plateu = False
    if config.get_value("lrs", None) is not None:
        lr_sched_type = config.lrs["type"]
        if lr_sched_type == "plateau":
            lrs_plateu = True
            learning_rate = tf.placeholder(tf.float32, [],
                                           name='learning_rate')
            lrs_learning_rate = config.get_value(
                "optimizer_params")["learning_rate"]
            lrs_n_bad_epochs = 0  # counter for plateu LRS
            lrs_patience = config.lrs["patience"]
            lrs_factor = config.lrs["factor"]
            lrs_threshold = config.lrs["threshold"]
            lrs_mode = config.lrs["mode"]
            lrs_best = -np.inf if lrs_mode == "max" else np.inf
            lrs_is_better = lambda old, new: (new > old * (
                1 + lrs_threshold)) if lrs_mode == "max" else (new < old * (
                    1 - lrs_threshold))
    else:
        learning_rate = None  # if no LRS is defined the default optimizer is used with its defined learning rate

    # LOAD WEIGHTS and get list of trainables if specified
    assign_loaded_variables = None
    trainables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
    if config.get_value("checkpoint", None) is not None:
        with Timer(name="Loading Checkpoint", verbose=True):
            assign_loaded_variables, trainables = tell.load_weights(
                config.get_value("checkpoint", None),
                config.get_value("freeze", False),
                config.get_value("exclude_weights", None),
                config.get_value("exclude_freeze", None))

    # Update step
    if len(trainables) > 0:
        update, gradients, gradient_name_dict = update_step(
            loss + reg_penalty,
            config,
            tell,
            lr=learning_rate,
            trainables=trainables)

    # INITIALIZE Tensorflow VARIABLES
    step = tell.initialize_tf_variables().global_step

    # ASSING LOADED WEIGHTS (overriding initializations) if available
    if assign_loaded_variables is not None:
        session.run(assign_loaded_variables)

    # -------------------------------------------------------------------------
    # Start training
    # -------------------------------------------------------------------------
    try:
        n_mbs = len(train_loader)
        epoch = int((step * batchsize) / (n_mbs * batchsize))
        epochs = range(epoch, n_epochs)

        if len(trainables) == 0:
            validate(val_loader, n_classes, session, loss_val, prediction_val,
                     model, workspace, step, batchsize, tell)
            return

        print("Epoch: {}/{} (step: {}, nmbs: {}, batchsize: {})".format(
            epoch + 1, n_epochs, step, n_mbs, batchsize))
        for ep in epochs:
            if ep == 0 and initial_val:
                f1 = validate(val_loader, n_classes, session, loss_val,
                              prediction_val, model, workspace, step,
                              batchsize, tell)
            else:
                if config.has_value("lrs_best") and config.has_value(
                        "lrs_learning_rate") and config.has_value(
                            "lrs_n_bad_epochs"):
                    f1 = config.get_value("lrs_f1")
                    lrs_best = config.get_value("lrs_best")
                    lrs_learning_rate = config.get_value("lrs_learning_rate")
                    lrs_n_bad_epochs = config.get_value("lrs_n_bad_epochs")
                else:
                    f1 = 0

            # LRS "Plateu"
            if lrs_plateu:
                # update scheduler
                if lrs_is_better(lrs_best, f1):
                    lrs_best = f1
                    lrs_n_bad_epochs = 0
                else:
                    lrs_n_bad_epochs += 1
                # update learning rate
                if lrs_n_bad_epochs > lrs_patience:
                    lrs_learning_rate = max(lrs_learning_rate * lrs_factor, 0)
                    lrs_n_bad_epochs = 0

            with tqdm(total=len(train_loader),
                      desc="Training [{}/{}]".format(ep + 1,
                                                     len(epochs))) as pbar:
                for mbi, mb in enumerate(train_loader):
                    # LRS "Plateu"
                    if lrs_plateu:
                        feed_dict = {
                            model.X: mb['input'].numpy(),
                            model.y_: mb['target'].numpy(),
                            model.dropout: dropout,
                            learning_rate: lrs_learning_rate
                        }
                    else:
                        feed_dict = {
                            model.X: mb['input'].numpy(),
                            model.y_: mb['target'].numpy(),
                            model.dropout: dropout
                        }

                    # TRAINING
                    pred, loss_train, _ = session.run(
                        [prediction, loss, update], feed_dict=feed_dict)

                    # Update status
                    pbar.set_description_str(
                        "Training [{}/{}] Loss: {:.4f}".format(
                            ep + 1, len(epochs), loss_train))
                    pbar.update()
                    step += 1

            validate(val_loader, n_classes, session, loss_val, prediction_val,
                     model, workspace, step, batchsize, tell)
    except AbortRun:
        print("Aborting...")
    finally:
        tell.close(global_step=step, save_checkpoint=True)
Beispiel #3
0
    def __init__(self, config: Config = None, summaries: list = ["training"], model_params=None):
        """
        Take care of initializing a TeLL environment.
            Creates working directory, instantiates network architecture, configures tensorflow and tensorboard.
            Furthermore takes care of resuming runs from an existing workspace.

        :param config: Config
            config object or None; if None config will be initialized from command line parameter
        :param summaries: list
            List of names for summary writers, by default one writer named "training" is opened
        :param model_params:
            Optional dictionary of parameters unpacked and passed to model upon initialization if not None

        :returns:

        tf_session: Tensorflow session

        tf_saver: Tensorflow checkpoint saver

        tf_summaries: dictionary containing tensorflow summary writers, accessible via the names passed upon creation

        model: TeLL model

        step: current global step (0 for new runs otherwise step stored in checkpoint file)

        workspace: TeLL workspace instance

        config: TeLL config object
        """
        if config is None:
            config = Config()
        
        # Setup working dir
        workspace = Workspace(config.working_dir, config.specs, config.restore)
        print("TeLL workspace: {}".format(workspace.working_dir))
        # Import configured architecture
        architecture = config.import_architecture()
        # Set GPU
        os.environ["CUDA_VISIBLE_DEVICES"] = str(config.get_value("cuda_gpu", "0"))
        # Some Tensorflow configuration
        tf_config = tf.ConfigProto(
            inter_op_parallelism_threads=config.get_value("inter_op_parallelism_threads", 1),
            intra_op_parallelism_threads=config.get_value("intra_op_parallelism_threads", 1),
            log_device_placement=config.get_value("log_device_placement", False)
        )
        tf_config.gpu_options.allow_growth = config.get_value("tf_allow_growth", True)
        # Start Tensorflow session
        print("Starting session...")
        tf_session = tf.Session(config=tf_config)
        # Set tensorflow random seed
        set_seed(config.get_value("random_seed", 12345))
        #
        # Init Tensorboard
        #
        print("Initializing summaries...")
        summary_instances = {}
        for summary in summaries:
            summary_instances[summary] = tf.summary.FileWriter(os.path.join(workspace.get_tensorboard_dir(), summary),
                                                               tf_session.graph)
        
        # Initialize Model
        if model_params is None:
            model = architecture(config=config)
        else:
            model = architecture(config=config, **model_params)
        
        # Print number of trainable parameters
        trainable_vars = np.sum([np.prod(t.get_shape()) for t in tf.trainable_variables()])
        print("Number of trainable variables: {}".format(trainable_vars))
        
        with tf.name_scope("TeLL") as tell_namescope:
            # Store global step in checkpoint
            tf_global_step = tf.Variable(initial_value=tf.constant(0, dtype=tf.int64), name="tell_global_step",
                                         dtype=tf.int64, trainable=False)
            # Define placeholder and operation to dynamically update tf_global_step with a python integer
            global_step_placeholder = tf.placeholder_with_default(tf_global_step, shape=tf_global_step.get_shape())
            set_global_step = tf_global_step.assign(global_step_placeholder)
        
        #
        # Add ops to save and restore all the variables
        #
        tf_saver = tf.train.Saver(max_to_keep=config.get_value("max_checkpoints", 10), sharded=False)
        # Expose members
        self.tf_session = tf_session
        self.tf_saver = tf_saver
        self.tf_summaries = summary_instances
        
        if config.has_value("optimizer"):
            if isinstance(config.optimizer, list):
                self.tf_optimizer = [getattr(tf.train, config.optimizer[i])(**config.optimizer_params[i])
                                     for i in range(len(config.optimizer))]
            else:
                self.tf_optimizer = getattr(tf.train, config.optimizer)(**config.optimizer_params)
        else:
            raise Exception(
                "Missing required parameter 'optimizer' (either specify in your config or on the command line)")
        
        self.model = model
        self.workspace = workspace
        self.config = config
        self.global_step = 0
        self.__global_step_placeholder = global_step_placeholder
        self.__global_step_update = set_global_step
        self.__tell_namescope = tell_namescope