Ejemplo n.º 1
0
            for key, value in zip(
                self.data_definitions.keys(),
                super(CIFAR10, self).collate_fn(batch).values())
        })


if __name__ == "__main__":
    """ Tests sequence generator - generates and displays a random sample"""

    # set the seeds
    np.random.seed(0)
    torch.manual_seed(0)

    # Load parameters.
    from miprometheus.utils.param_interface import ParamInterface
    params = ParamInterface()  # using the default values

    batch_size = 64

    # Create problem.
    cifar10 = CIFAR10(params)

    # get a sample
    sample = cifar10[0]
    print('__getitem__ works.\n')

    # wrap DataLoader on top of this Dataset subclass
    from torch.utils.data import DataLoader

    dataloader = DataLoader(dataset=cifar10,
                            collate_fn=cifar10.collate_fn,
Ejemplo n.º 2
0
        data_dict = self.create_data_dict()
        data_dict['sequences'] = torch.from_numpy(inputs).type(self.app_state.dtype)
        data_dict['targets'] = torch.from_numpy(targets).type(self.app_state.dtype)
        data_dict['masks'] = ptmasks
        data_dict['sequences_length'] = torch.ones([batch_size,1]).type(torch.CharTensor) * seq_length
        data_dict['num_subsequences'] = torch.ones([batch_size, 1]).type(torch.CharTensor)
        return data_dict


if __name__ == "__main__":
    """ Tests sequence generator - generates and displays a random sample"""

    # "Loaded parameters".
    from miprometheus.utils.param_interface import ParamInterface

    params = ParamInterface()
    params.add_config_params({#'control_bits': 2,
                              #'data_bits': 8,
                              #'antisymmetry': True,
                              'hard' : True,
                              'min_sequence_length': 3,
                              'max_sequence_length': 5})
    batch_size = 64

    # Create problem object.
    seqsymcl = SequenceSymmetryCommandLines(params)

    # get a sample
    sample = seqsymcl[0]
    print(repr(sample))
    print('__getitem__ works.')
Ejemplo n.º 3
0
        self.use_mask = params["use_mask"]

    def evaluate_loss(self, data_dict, logits):
        """ Calculates accuracy equal to mean number of correct predictions in a given batch.
        WARNING: Applies mask to both logits and targets!

        :param data_dict: DataDict({'sequences', 'sequences_length', 'targets', 'mask'}).

        :param logits: Predictions being output of the model.

        """
        # Check if mask should be is used - if so, use the correct loss
        # function.
        if self.use_mask:
            loss = self.loss_function(
                logits, data_dict['targets'], data_dict['masks'])
        else:
            loss = self.loss_function(logits, data_dict['targets'])

        return loss


if __name__ == '__main__':

    from miprometheus.utils.param_interface import ParamInterface

    sample = SeqToSeqProblem(ParamInterface())[0]
    # equivalent to ImageTextToClassProblem(params={}).__getitem__(index=0)

    print(repr(sample))
Ejemplo n.º 4
0
class GridWorker(object):
    """
    Base abstract class for the grid workers.
    All grid workers should subclass it and override the relevant methods.
    """
    def __init__(self, name="GridWorker", use_gpu=False):
        """
        Base constructor for all grid workers:

            - Initializes the AppState singleton:

                >>> self.app_state = AppState()

            - Initializes the Parameter Registry:

                >>> self.params = ParamInterface()

            - Defines the logger:

                >>> self.logger = logging.getLogger(name=self.name)

            - Creates parser and adds default worker command line arguments (you can display them with ``--h``).

        :param name: Name of the worker (DEFAULT: "GridWorker").
        :type name: str

        :param use_gpu: Indicates whether the worker should use GPU or not. Value coming from the subclasses \
         (e.g. ``GridTrainerCPU`` vs ``GridTrainerGPU``) (DEFAULT: False).
        :type use_gpu: bool

        """
        # Call base constructor.
        super(GridWorker, self).__init__()

        # Set worker name.
        self.name = name

        # Initialize the application state singleton.
        self.app_state = AppState()
        self.app_state.use_CUDA = use_gpu

        # Initialize parameter interface/registry.
        self.params = ParamInterface()

        # Load the default logger configuration.
        logger_config = {
            'version': 1,
            'disable_existing_loggers': False,
            'formatters': {
                'simple': {
                    'format':
                    '[%(asctime)s] - %(levelname)s - %(name)s >>> %(message)s',
                    'datefmt': '%Y-%m-%d %H:%M:%S'
                }
            },
            'handlers': {
                'console': {
                    'class': 'logging.StreamHandler',
                    'level': 'INFO',
                    'formatter': 'simple',
                    'stream': 'ext://sys.stdout'
                }
            },
            'root': {
                'level': 'DEBUG',
                'handlers': ['console']
            }
        }

        logging.config.dictConfig(logger_config)

        # Create the Logger, set its label and logging level.
        self.logger = logging.getLogger(name=self.name)

        # Create parser with a list of runtime arguments.
        self.parser = argparse.ArgumentParser(
            formatter_class=argparse.RawTextHelpFormatter)

        # Add arguments to the specific parser.
        # These arguments will be shared by all grid workers.
        self.parser.add_argument(
            '--outdir',
            dest='outdir',
            type=str,
            default="./experiments",
            help=
            'Path to the global output directory where the experiments folders '
            'will be / are stored. Affects all grid experiments.'
            ' (DEFAULT: ./experiments)')

        self.parser.add_argument(
            '--savetag',
            dest='savetag',
            type=str,
            default='',
            help='Additional tag for the global output directory.')

        self.parser.add_argument(
            '--ll',
            action='store',
            dest='log_level',
            type=str,
            default='INFO',
            choices=[
                'CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG', 'NOTSET'
            ],
            help="Log level for the experiments. (Default: INFO)")

        self.parser.add_argument(
            '--li',
            dest='logging_interval',
            default=100,
            type=int,
            help=
            'Statistics logging interval. Will impact logging to the logger and exporting to '
            'TensorBoard for the experiments. Do not affect the grid worker. '
            'Writing to the csv file is not impacted (interval of 1).'
            ' (Default: 100, i.e. logs every 100 episodes).')

        self.parser.add_argument(
            '--agree',
            dest='confirm',
            action='store_true',
            help='Request user confirmation before starting the grid experiment.'
            '  (Default: False)')

    def setup_grid_experiment(self):
        """
        Setups the overall grid of experiments.

        Base method:

            - Parses command line arguments,

            - Sets the 3 default sections (training / validation / test) for the param registry, \
            sets seeds to unspecified and disable multiprocessing. Also saves the specified ``cuda`` key.

        .. note::

            Child classes should override this method, but still call its parent to draw the basic functionality \
            implemented here.

        """
        # Parse arguments.
        self.flags, self.unparsed = self.parser.parse_known_args()

        # Set logger depending on the settings.
        self.logger.setLevel(
            getattr(logging, self.flags.log_level.upper(), None))

        # add empty sections
        self.params.add_default_params({"training": {}})
        self.params.add_default_params({"validation": {}})
        self.params.add_default_params({"testing": {}})

        # set seeds to undefined (-1), pass CUDA value and deactivate multiprocessing for `DataLoader`.
        # It is important not to set the seeds here as they would be identical for all experiments.
        self.params["training"].add_default_params({
            "seed_numpy": -1,
            "seed_torch": -1,
            "dataloader": {
                'num_workers': 0
            }
        })

        self.params["validation"].add_default_params(
            {"dataloader": {
                'num_workers': 0
            }})

        self.params["testing"].add_default_params({
            "seed_numpy": -1,
            "seed_torch": -1,
            "dataloader": {
                'num_workers': 0
            }
        })

    @abstractmethod
    def run_grid_experiment(self):
        """
Ejemplo n.º 5
0
            Q[i * self.NUM_QUESTIONS:(i + 1) * self.NUM_QUESTIONS,
              1, obj.color] = True
            # Query.
            Q[i * self.NUM_QUESTIONS:(i + 1) * self.NUM_QUESTIONS, 2,
              :num_bits] = query_matrix[:self.NUM_QUESTIONS, :num_bits]

        return Q


if __name__ == "__main__":
    """ Tests Shape-Color-Query - generates and displays a sample"""

    # "Loaded parameters".
    from miprometheus.utils.param_interface import ParamInterface

    params = ParamInterface()
    params.add_default_params({'data_folder': '~/data/shape-color-query/',
                               'split': 'train',
                               'regenerate': False,
                               'dataset_size': 10000,
                               'img_size': 128})

    # create problem
    shapecolorquery = ShapeColorQuery(params)

    batch_size = 64
    print('Number of episodes to run to cover the set once: {}'.format(shapecolorquery.get_epoch_size(batch_size)))

    # get a sample
    sample = shapecolorquery[0]
    print(repr(sample))
            self.app_state.dtype)
        data_dict['masks'] = ptmasks
        data_dict['sequences_length'] = torch.ones([batch_size, 1]).type(
            torch.CharTensor) * seq_length
        data_dict['num_subsequences'] = torch.ones([batch_size,
                                                    1]).type(torch.CharTensor)
        return data_dict


if __name__ == "__main__":
    """ Tests sequence generator - generates and displays a random sample"""

    # "Loaded parameters".
    from miprometheus.utils.param_interface import ParamInterface

    params = ParamInterface()
    params.add_config_params({  #'control_bits': 4,
        #'data_bits': 8,
        'min_sequence_length': 1,
        'max_sequence_length': 10
    })
    batch_size = 64

    # Create problem object.
    repeatserialrecallcl = RepeatSerialRecallCommandLines(params)

    # get a sample
    sample = repeatserialrecallcl[0]
    print(repr(sample))
    print('__getitem__ works.')
            self.app_state.dtype)
        data_dict['masks'] = ptmasks
        data_dict['sequences_length'] = torch.ones([batch_size, 1]).type(
            torch.CharTensor) * seq_length
        data_dict['num_subsequences'] = torch.ones([batch_size,
                                                    1]).type(torch.CharTensor)
        return data_dict


if __name__ == "__main__":
    """ Tests sequence generator - generates and displays a random sample"""

    # "Loaded parameters".
    from miprometheus.utils.param_interface import ParamInterface

    params = ParamInterface()
    params.add_config_params({  #'control_bits': 4,
        #'data_bits': 8,
        # 'randomize_control_lines': False,
        'min_sequence_length': 1,
        'max_sequence_length': 10
    })
    batch_size = 64

    # Create problem object.
    repeatreverserecallcl = RepeatReverseRecallCommandLines(params)

    # get a sample
    sample = repeatreverserecallcl[0]
    print(repr(sample))
    print('__getitem__ works.')
Ejemplo n.º 8
0
        return data_dict, logits


if __name__ == "__main__":
    """
    Problem class Unit Test.
    """

    eng_prefixes = ("i am ", "i m ", "he is", "he s ", "she is", "she s",
                    "you are", "you re ", "we are", "we re ", "they are",
                    "they re ")

    # Load parameters.
    from miprometheus.utils.param_interface import ParamInterface
    params = ParamInterface()
    params.add_default_params({
        'training_size': 0.9,
        'output_lang_name': 'fra',
        'max_sequence_length': 15,
        'embedding_dim': 256,
        'eng_prefixes': eng_prefixes,
        'use_train_data': True,
        'data_folder': '~/data/language',
        'reverse': False
    })

    batch_size = 64

    # Create problem.
    translation = TranslationAnki(params)
Ejemplo n.º 9
0
        file_folder_to_check = os.path.expanduser(file_folder_to_check)
        if not (os.path.isfile(file_folder_to_check) or os.path.isdir(file_folder_to_check)):
            self.logger.info('Downloading {}'.format(url))
            urllib.request.urlretrieve(url, os.path.expanduser(download_name), reporthook)
            return True
        else:
            self.logger.info('Dataset found at {}'.format(file_folder_to_check))
            return False


if __name__ == '__main__':
    """Unit test for Problem and DataDict"""
    from miprometheus.utils.param_interface import ParamInterface

    params = ParamInterface()

    problem = Problem(params)
    problem.data_definitions = {'inputs': {'size': [-1, -1], 'type': [torch.Tensor]},
                                'targets': {'size': [-1], 'type': [torch.Tensor]}
                                }
    problem.loss_function = torch.nn.CrossEntropyLoss()  # torch.nn.L1Loss, torch.nn.TripletMarginLoss

    datadict = DataDict({key: None for key in problem.data_definitions.keys()})

    # datadict['inputs'] = torch.ones([64, 20, 512]).type(torch.FloatTensor)
    # datadict['targets'] = torch.ones([64, 20]).type(torch.FloatTensor)

    # print(repr(datadict))

Ejemplo n.º 10
0
                "The specified class '{}' is not derived from the nn.Module class"
                .format(name))
            exit(-1)

        # Ok, proceed.
        logger.info('Loading the {} controller from {}'.format(
            name, controller_class.__module__))

        # return the instantiated controller class
        return controller_class(params)


if __name__ == "__main__":
    """
    Tests ControllerFactory.
    """
    from miprometheus.utils.param_interface import ParamInterface

    controller_params = ParamInterface()
    controller_params.add_default_params({
        'name': 'RNNController',
        'input_size': 11,
        'output_size': 11,
        'hidden_state_size': 20,
        'num_layers': 1,
        'non_linearity': 'sigmoid'
    })

    controller = ControllerFactory.build_controller(controller_params)
    print(type(controller))
Ejemplo n.º 11
0
        answer_string = [list(self.answer_dic.keys())[list(self.answer_dic.values()).index(
                answers[batch_num].data)] for batch_num in range(batch_size)]

        data_dict['targets_string'] = answer_string
        data_dict['predictions_string'] = prediction_string
        data_dict['clevr_dir'] = self.data_folder

        return data_dict, logits


if __name__ == "__main__":
    """Unit test that generates a batch and displays a sample."""

    from miprometheus.utils.param_interface import ParamInterface
    params = ParamInterface()
    params.add_default_params({'settings': {'data_folder': '~/Downloads/CLEVR_v1.0',
                               'set': 'train',
                               'dataset_variant': 'CLEVR'},

                               'images': {'raw_images': False,
                                          'feature_extractor': {'cnn_model': 'resnet101',
                                                                'num_blocks': 4}},

                               'questions': {'embedding_type': 'random', 'embedding_dim': 300}})

    # create problem
    clevr_dataset = CLEVR(params)

    batch_size = 64
    print('Number of episodes to run to cover the set once: {}'.format(clevr_dataset.get_epoch_size(batch_size)))
Ejemplo n.º 12
0
        plt.imshow(image, interpolation='nearest', aspect='auto')

        # Plot!
        plt.show()


if __name__ == '__main__':
    # Set visualization.
    from miprometheus.utils.app_state import AppState
    AppState().visualize = True

    from miprometheus.utils.param_interface import ParamInterface
    from torch.utils.data.dataloader import DataLoader
    from miprometheus.problems import CIFAR10

    problem_params = ParamInterface()
    problem_params.add_config_params({'use_train_data': True,
                                      'root_dir': '~/data/cifar10',
                                      'padding': [0, 0, 0, 0],
                                      'up_scaling': True})
    batch_size = 64

    # create problem
    problem = CIFAR10(problem_params)
    print('Problem {} instantiated.'.format(problem.name))

    # instantiate DataLoader object
    dataloader = DataLoader(problem, batch_size=batch_size, collate_fn=problem.collate_fn)

    # Test base model.
    from miprometheus.utils.param_interface import ParamInterface
        if image.shape[0] == 1:
            # This is a single channel image - get rid of this dimension
            image = np.squeeze(image, axis=0)
        else:
            # More channels - move channels to axis2, according to matplotilb documentation.
            # (X : array_like, shape (n, m) or (n, m, 3) or (n, m, 4))
            image = image.transpose(1, 2, 0)

        # show data.
        plt.xlabel('num_columns')
        plt.ylabel('num_rows')
        plt.title('Target class: {} ({}), {}th in Sequence'.format(
            label, target, sequence_number))
        plt.imshow(image,
                   interpolation='nearest',
                   aspect='auto',
                   cmap='gray_r')

        # Plot!
        plt.show()


if __name__ == '__main__':

    from miprometheus.utils.param_interface import ParamInterface

    sample = VideoToClassProblem(ParamInterface())[0]
    # equivalent to ImageToClassProblem(params={}).__getitem__(index=0)

    print(repr(sample))
Ejemplo n.º 14
0
                .format(name))
            exit(-1)

        # Ok, proceed.
        logger.info('Loading the {} problem from {}'.format(
            name, problem_class.__module__))
        # return the instantiated problem class
        return problem_class(params)


if __name__ == "__main__":
    """
    Tests ProblemFactory.
    """
    from miprometheus.utils.param_interface import ParamInterface
    params = ParamInterface()
    params.add_default_params({
        'name': 'SerialRecall',
        'control_bits': 3,
        'data_bits': 8,
        'batch_size': 1,
        'min_sequence_length': 1,
        'max_sequence_length': 10,
        'num_subseq_min': 1,
        'num_subseq_max': 5,
        'bias': 0.5
    })

    problem = ProblemFactory.build_problem(params)
    print(type(problem))
Ejemplo n.º 15
0
                   interpolation='nearest', aspect='auto')

        # Plot!
        plt.show()


if __name__ == '__main__':
    """ Tests MultiHopsStackedAttentionNetwork on ShapeColorQuery"""

    # "Loaded parameters".
    from miprometheus.utils.param_interface import ParamInterface
    from miprometheus.utils.app_state import AppState
    app_state = AppState()
    app_state.visualize = False
    from miprometheus.problems import ShapeColorQuery
    problem_params = ParamInterface()
    problem_params.add_config_params({'data_folder': '~/data/shape-color-query/',
                                      'split': 'train',
                                      'regenerate': False,
                                      'dataset_size': 10000,
                                      'img_size': 128})

    # create problem
    shapecolorquery = ShapeColorQuery(problem_params)

    batch_size = 64

    # wrap DataLoader on top of this Dataset subclass
    from torch.utils.data import DataLoader

    dataloader = DataLoader(dataset=shapecolorquery, collate_fn=shapecolorquery.collate_fn,
Ejemplo n.º 16
0
if __name__ == '__main__':
    dim = 512
    embed_hidden = 300
    max_step = 12
    self_attention = True
    memory_gate = True
    nb_classes = 28
    dropout = 0.15

    from miprometheus.utils.app_state import AppState
    from miprometheus.utils.param_interface import ParamInterface
    from torch.utils.data import DataLoader
    app_state = AppState()

    from miprometheus.problems import CLEVR
    problem_params = ParamInterface()
    problem_params.add_config_params({
        'settings': {
            'data_folder': '~/Downloads/CLEVR_v1.0',
            'set': 'train',
            'dataset_variant': 'CLEVR'
        },
        'images': {
            'raw_images': False,
            'feature_extractor': {
                'cnn_model': 'resnet101',
                'num_blocks': 4
            }
        },
        'questions': {
            'embedding_type': 'random',
Ejemplo n.º 17
0
        logits = torch.stack(logits, 1)
        return logits


if __name__ == "__main__":
    # Set logging level.
    import logging
    logging.basicConfig(level=logging.DEBUG)

    # Set visualization.
    from miprometheus.utils.app_state import AppState
    AppState().visualize = True

    # "Loaded parameters".
    from miprometheus.utils.param_interface import ParamInterface
    params = ParamInterface()
    params.add_default_params({
        'encoding_bit': 0,
        'solving_bit': 1,
        # controller parameters
        'controller': {
            'name': 'RNNController',
            'hidden_state_size': 20,
            'num_layers': 1,
            'non_linearity': 'sigmoid'
        },
        'mae_interface': {
            'shift_size': 3
        },  # encoder interface parameters
        'mas_interface': {
            'shift_size': 3
Ejemplo n.º 18
0
        # Plot figure and list of frames.
        self.plotWindow.update(fig, frames)


if __name__ == '__main__':
    """Unit test of the SequentialModel"""

    from miprometheus.utils.param_interface import ParamInterface
    from miprometheus.utils.app_state import AppState

    # Set visualization.
    AppState().visualize = True

    # Test sequential model.
    sequential_model = SequentialModel(ParamInterface())

    # Set logging level.
    import logging
    logging.basicConfig(level=logging.DEBUG)

    while True:
        # Generate new sequence.
        x = np.random.binomial(1, 0.5, (1, 8, 15))
        y = np.random.binomial(1, 0.5, (1, 8, 15))
        z = np.random.binomial(1, 0.5, (1, 8, 15))

        # Transform to PyTorch.
        x = torch.from_numpy(x).type(torch.FloatTensor)
        y = torch.from_numpy(y).type(torch.FloatTensor)
        z = torch.from_numpy(z).type(torch.FloatTensor)
Ejemplo n.º 19
0
        target = targets[sequence_number]
        label = labels[sequence_number]

        # Reshape image.
        if image.shape[0] == 1:
            # This is a single channel image - get rid of this dimension
            image = np.squeeze(image, axis=0)
        else:
            # More channels - move channels to axis2, according to matplotilb documentation.
            # (X : array_like, shape (n, m) or (n, m, 3) or (n, m, 4))
            image = image.transpose(1, 2, 0)

        # show data.
        plt.xlabel('num_columns')
        plt.ylabel('num_rows')
        plt.title('Target: {} ({}), {}th in Sequence, Question: {}'.format(
            label, target, sequence_number, question))
        plt.imshow(image, interpolation='nearest', aspect='auto')

        # Plot!
        plt.show()


if __name__ == '__main__':

    from miprometheus.utils.param_interface import ParamInterface

    sample = VQAProblem(ParamInterface())[0]

    print(repr(sample))
Ejemplo n.º 20
0
        """

        return DataDict({key: value for key, value in zip(self.data_definitions.keys(),
                                                          super(CIFAR10, self).collate_fn(batch).values())})


if __name__ == "__main__":
    """ Tests sequence generator - generates and displays a random sample"""

    # set the seeds
    np.random.seed(0)
    torch.manual_seed(0)

    # Load parameters.
    from miprometheus.utils.param_interface import ParamInterface
    params = ParamInterface()
    params.add_default_params({'use_train_data': True,
                               'padding': [0, 0, 0, 0],
                               'up_scaling': False
                                })
    batch_size = 64

    # Create problem.
    cifar10 = CIFAR10(params)

    # get a sample
    sample = cifar10[0]
    print('__getitem__ works.\n')

    # wrap DataLoader on top of this Dataset subclass
    from torch.utils.data.dataloader import DataLoader
Ejemplo n.º 21
0
            self.app_state.dtype)
        data_dict['masks'] = ptmasks
        data_dict['sequences_length'] = torch.ones([batch_size, 1]).type(
            torch.CharTensor) * seq_length
        data_dict['num_subsequences'] = torch.ones([batch_size,
                                                    1]).type(torch.CharTensor)
        return data_dict


if __name__ == "__main__":
    """ Tests sequence generator - generates and displays a random sample"""

    # "Loaded parameters".
    from miprometheus.utils.param_interface import ParamInterface

    params = ParamInterface()
    params.add_config_params({  #'control_bits': 2,
        #'data_bits': 8,
        #'inequality': True,
        'hard': True,
        'min_sequence_length': 2,
        'max_sequence_length': 5
    })
    batch_size = 64

    # Create problem object.
    seqequacl = SequenceEqualityCommandLines(params)

    # get a sample
    sample = seqequacl[0]
    print(repr(sample))
Ejemplo n.º 22
0
        logits = torch.stack(logits, 1)
        return logits


if __name__ == "__main__":
    # Set logging level.
    logger = logging.getLogger('EncoderSolverNTM')
    logging.basicConfig(level=logging.DEBUG)

    from miprometheus.utils.param_interface import ParamInterface
    # Set visualization.
    from miprometheus.utils.app_state import AppState
    AppState().visualize = True

    # "Loaded parameters".
    params = ParamInterface()
    params.add_default_params({
        # controller parameters
        'controller': {
            'name': 'RNNController',
            'hidden_state_size': 20,
            'num_layers': 1,
            'non_linearity': 'sigmoid'
        },
        # interface parameters
        'interface': {
            'num_read_heads': 1,
            'shift_size': 3
        },
        # memory parameters
        'memory': {
Ejemplo n.º 23
0
    def __init__(self, name="GridWorker", use_gpu=False):
        """
        Base constructor for all grid workers:

            - Initializes the AppState singleton:

                >>> self.app_state = AppState()

            - Initializes the Parameter Registry:

                >>> self.params = ParamInterface()

            - Defines the logger:

                >>> self.logger = logging.getLogger(name=self.name)

            - Creates parser and adds default worker command line arguments (you can display them with ``--h``).

        :param name: Name of the worker (DEFAULT: "GridWorker").
        :type name: str

        :param use_gpu: Indicates whether the worker should use GPU or not. Value coming from the subclasses \
         (e.g. ``GridTrainerCPU`` vs ``GridTrainerGPU``) (DEFAULT: False).
        :type use_gpu: bool

        """
        # Call base constructor.
        super(GridWorker, self).__init__()

        # Set worker name.
        self.name = name

        # Initialize the application state singleton.
        self.app_state = AppState()
        self.app_state.use_CUDA = use_gpu

        # Initialize parameter interface/registry.
        self.params = ParamInterface()

        # Load the default logger configuration.
        logger_config = {
            'version': 1,
            'disable_existing_loggers': False,
            'formatters': {
                'simple': {
                    'format':
                    '[%(asctime)s] - %(levelname)s - %(name)s >>> %(message)s',
                    'datefmt': '%Y-%m-%d %H:%M:%S'
                }
            },
            'handlers': {
                'console': {
                    'class': 'logging.StreamHandler',
                    'level': 'INFO',
                    'formatter': 'simple',
                    'stream': 'ext://sys.stdout'
                }
            },
            'root': {
                'level': 'DEBUG',
                'handlers': ['console']
            }
        }

        logging.config.dictConfig(logger_config)

        # Create the Logger, set its label and logging level.
        self.logger = logging.getLogger(name=self.name)

        # Create parser with a list of runtime arguments.
        self.parser = argparse.ArgumentParser(
            formatter_class=argparse.RawTextHelpFormatter)

        # Add arguments to the specific parser.
        # These arguments will be shared by all grid workers.
        self.parser.add_argument(
            '--outdir',
            dest='outdir',
            type=str,
            default="./experiments",
            help=
            'Path to the global output directory where the experiments folders '
            'will be / are stored. Affects all grid experiments.'
            ' (DEFAULT: ./experiments)')

        self.parser.add_argument(
            '--savetag',
            dest='savetag',
            type=str,
            default='',
            help='Additional tag for the global output directory.')

        self.parser.add_argument(
            '--ll',
            action='store',
            dest='log_level',
            type=str,
            default='INFO',
            choices=[
                'CRITICAL', 'ERROR', 'WARNING', 'INFO', 'DEBUG', 'NOTSET'
            ],
            help="Log level for the experiments. (Default: INFO)")

        self.parser.add_argument(
            '--li',
            dest='logging_interval',
            default=100,
            type=int,
            help=
            'Statistics logging interval. Will impact logging to the logger and exporting to '
            'TensorBoard for the experiments. Do not affect the grid worker. '
            'Writing to the csv file is not impacted (interval of 1).'
            ' (Default: 100, i.e. logs every 100 episodes).')

        self.parser.add_argument(
            '--agree',
            dest='confirm',
            action='store_true',
            help='Request user confirmation before starting the grid experiment.'
            '  (Default: False)')
        """

        return DataDict({
            key: value
            for key, value in zip(
                self.data_definitions.keys(),
                super(SequentialPixelMNIST, self).collate_fn(batch).values())
        })


if __name__ == "__main__":
    """ Tests sequence generator - generates and displays a random sample"""

    # Load parameters.
    from miprometheus.utils.param_interface import ParamInterface
    params = ParamInterface()
    params.add_default_params({
        'use_train_data': True,
        'root_dir': '~/data/mnist'
    })

    batch_size = 64

    # Create problem.
    problem = SequentialPixelMNIST(params)

    # get a sample
    sample = problem[0]
    print(repr(sample))

    # test whether data structures match expected definitions
Ejemplo n.º 25
0
        return data_dict

    # method for changing the maximum length, used mainly during curriculum
    # learning
    def set_max_length(self, max_length):
        self.max_sequence_length = max_length


if __name__ == "__main__":
    """ Tests sequence generator - generates and displays a random sample"""

    # "Loaded parameters".
    from miprometheus.utils.param_interface import ParamInterface

    params = ParamInterface()
    params.add_config_params({'control_bits': 2,
                              'data_bits': 8,
                              'batch_size': 2,
                              'min_sequence_length': 1,
                              'max_sequence_length': 10,
                              'num_subseq_min': 4,
                              'num_subseq_max': 4})
    batch_size = 64

    # Create problem object.
    readingspan = ReadingSpan(params)

    # get a sample
    sample = readingspan[0]
    print(repr(sample))
Ejemplo n.º 26
0
class Worker(object):
    """
    Base abstract class for the workers.
    All base workers should subclass it and override the relevant methods.
    """
    def __init__(self, name, add_default_parser_args=True):
        """
        Base constructor for all workers:

            - Initializes the AppState singleton:

                >>> self.app_state = AppState()

            - Initializes the Parameter Registry:

                >>> self.params = ParamInterface()

            - Defines the logger:

                >>> self.logger = logging.getLogger(name=self.name)

            - Creates parser and adds default worker command line arguments.

        :param name: Name of the worker.
        :type name: str

        :param add_default_parser_args: If set, adds default parser arguments (DEFAULT: True).
        :type add_default_parser_args: bool

        """
        # Call base constructor.
        super(Worker, self).__init__()

        # Set worker name.
        self.name = name

        # Initialize the application state singleton.
        self.app_state = AppState()

        # Initialize parameter interface/registry.
        self.params = ParamInterface()

        # Initialize logger using the configuration.
        self.initialize_logger()

        # Create parser with a list of runtime arguments.
        self.parser = argparse.ArgumentParser(
            formatter_class=argparse.RawTextHelpFormatter)

        # Add arguments to the specific parser.
        if add_default_parser_args:
            # These arguments will be shared by all basic workers.
            self.parser.add_argument(
                '--config',
                dest='config',
                type=str,
                default='',
                help='Name of the configuration file(s) to be loaded. '
                'If specifying more than one file, they must be separated with coma ",".'
            )

            self.parser.add_argument(
                '--model',
                type=str,
                default='',
                dest='model',
                help='Path to the file containing the saved parameters'
                ' of the model to load (model checkpoint, should end with a .pt extension.)'
            )

            self.parser.add_argument(
                '--gpu',
                dest='use_gpu',
                action='store_true',
                help=
                'The current worker will move the computations on GPU devices, if available '
                'in the system. (Default: False)')

            self.parser.add_argument(
                '--expdir',
                dest='expdir',
                type=str,
                default="./experiments",
                help=
                'Path to the directory where the experiment(s) folders are/will be stored.'
                ' (DEFAULT: ./experiments)')

            self.parser.add_argument('--savetag',
                                     dest='savetag',
                                     type=str,
                                     default='',
                                     help='Tag for the save directory.')

            self.parser.add_argument('--ll',
                                     action='store',
                                     dest='log_level',
                                     type=str,
                                     default='INFO',
                                     choices=[
                                         'CRITICAL', 'ERROR', 'WARNING',
                                         'INFO', 'DEBUG', 'NOTSET'
                                     ],
                                     help="Log level. (Default: INFO)")

            self.parser.add_argument(
                '--li',
                dest='logging_interval',
                default=100,
                type=int,
                help=
                'Statistics logging interval. Will impact logging to the logger and '
                'exporting to TensorBoard. Writing to the csv file is not impacted '
                '(interval of 1).(Default: 100, i.e. logs every 100 episodes).'
            )

            self.parser.add_argument(
                '--agree',
                dest='confirm',
                action='store_true',
                help=
                'Request user confirmation just after loading the settings, '
                'before starting training. (Default: False)')

    def initialize_logger(self):
        """
        Initializes the logger, with a specific configuration:

        >>> logger_config = {'version': 1,
        >>>                  'disable_existing_loggers': False,
        >>>                  'formatters': {
        >>>                      'simple': {
        >>>                          'format': '[%(asctime)s] - %(levelname)s - %(name)s >>> %(message)s',
        >>>                          'datefmt': '%Y-%m-%d %H:%M:%S'}},
        >>>                  'handlers': {
        >>>                      'console': {
        >>>                          'class': 'logging.StreamHandler',
        >>>                          'level': 'INFO',
        >>>                          'formatter': 'simple',
        >>>                          'stream': 'ext://sys.stdout'}},
        >>>                  'root': {'level': 'DEBUG',
        >>>                           'handlers': ['console']}}

        """
        # Load the default logger configuration.
        logger_config = {
            'version': 1,
            'disable_existing_loggers': False,
            'formatters': {
                'simple': {
                    'format':
                    '[%(asctime)s] - %(levelname)s - %(name)s >>> %(message)s',
                    'datefmt': '%Y-%m-%d %H:%M:%S'
                }
            },
            'handlers': {
                'console': {
                    'class': 'logging.StreamHandler',
                    'level': 'INFO',
                    'formatter': 'simple',
                    'stream': 'ext://sys.stdout'
                }
            },
            'root': {
                'level': 'DEBUG',
                'handlers': ['console']
            }
        }

        logging.config.dictConfig(logger_config)

        # Create the Logger, set its label and logging level.
        self.logger = logging.getLogger(name=self.name)

    def display_parsing_results(self):
        """
        Displays the properly & improperly parsed arguments (if any).

        """
        # Log the parsed flags.
        flags_str = 'Properly parsed command line arguments: \n'
        flags_str += '=' * 80 + '\n'
        for arg in vars(self.flags):
            flags_str += "{}= {} \n".format(arg, getattr(self.flags, arg))
        flags_str += '=' * 80 + '\n'
        self.logger.info(flags_str)

        # Log the unparsed flags if any.
        if self.unparsed:
            flags_str = 'Invalid command line arguments: \n'
            flags_str += '=' * 80 + '\n'
            for arg in self.unparsed:
                flags_str += "{} \n".format(arg)
            flags_str += '=' * 80 + '\n'
            self.logger.warning(flags_str)

    def setup_experiment(self):
        """
        Setups a specific experiment.

        Base method:

            - Parses command line arguments.

            - Sets the 3 default sections (training / validation / test) and sets their dataloaders params.

        .. note::

            Child classes should override this method, but still call its parent to draw the basic functionality \
            implemented here.


        """
        # Parse arguments.
        self.flags, self.unparsed = self.parser.parse_known_args()

        # Set logger depending on the settings.
        self.logger.setLevel(
            getattr(logging, self.flags.log_level.upper(), None))

        # add empty sections
        self.params.add_default_params(
            {"training": {
                'terminal_conditions': {}
            }})
        self.params.add_default_params({"validation": {}})
        self.params.add_default_params({"testing": {}})

        # set a default configuration section for the DataLoaders
        dataloader_config = {
            'dataloader': {
                'shuffle': True,  # shuffle set by default.
                'batch_sampler': None,
                'num_workers':
                0,  # Do not use multiprocessing by default - for now.
                'pin_memory': False,
                'drop_last': False,
                'timeout': 0
            },
            'sampler': {},  # not using sampler by default
        }

        self.params["training"].add_default_params(dataloader_config)
        self.params["validation"].add_default_params(dataloader_config)
        self.params["testing"].add_default_params(dataloader_config)

    def build_problem_sampler_loader(self, params, section_name):
        """
        Builds and returns the Problem class, alongside its DataLoader.

        Also builds the sampler if required.

        :param params: 'ParamInterface' object, referring to one of main sections (training/validation/testing).
        :type params: miprometheus.utils.ParamInterface

        :param section_name: name of the section that will be used by logger for display.

        :return: Problem instance & DataLoader instance.
        """

        # Build the problem.
        problem = ProblemFactory.build(params['problem'])

        # Try to build the sampler.
        sampler = SamplerFactory.build(problem, params['sampler'])

        if sampler is not None:
            # Set shuffle to False - REQUIRED as those two are exclusive.
            params['dataloader'].add_config_params({'shuffle': False})

        # build the DataLoader on top of the validation problem
        loader = DataLoader(
            dataset=problem,
            batch_size=params['problem']['batch_size'],
            shuffle=params['dataloader']['shuffle'],
            sampler=sampler,
            batch_sampler=params['dataloader']['batch_sampler'],
            num_workers=params['dataloader']['num_workers'],
            collate_fn=problem.collate_fn,
            pin_memory=params['dataloader']['pin_memory'],
            drop_last=params['dataloader']['drop_last'],
            timeout=params['dataloader']['timeout'],
            worker_init_fn=problem.worker_init_fn)

        # Display sizes.
        self.logger.info("Problem for '{}' loaded (size: {})".format(
            section_name, len(problem)))
        if (sampler is not None):
            self.logger.info("Sampler for '{}' created (size: {})".format(
                section_name, len(sampler)))

        # Return sampler - even if it is none :]
        return problem, sampler, loader

    def get_epoch_size(self, problem, sampler, batch_size, drop_last):
        """
        Compute the number of iterations ('episodes') to run given the size of the dataset and the batch size to cover
        the entire dataset once.

        Takes into account whether one used sampler or not.

        :param problem: Object derived from the ''Problem'' class

        :param sampler: Sampler (may be None)

        :param batch_size: Batch size.
        :type batch_size: int

        :param drop_last: If True then last batch (if incomplete) will not be counted
        :type drop_last: bool

        .. note::

            If the last batch is incomplete we are counting it in when ``drop_last`` in ``DataLoader()`` is set to Ttrue.

        .. warning::

            Leaving this method 'just in case', in most cases one might simply use ''len(dataloader)''.

        :return: Number of iterations to perform to go though the entire dataset once.

        """
        # "Estimate" dataset size.
        if (sampler is not None):
            problem_size = len(sampler)
        else:
            problem_size = len(problem)

        # If problem_size is a multiciplity of batch_size OR drop last is set.
        if (problem_size % batch_size) == 0 or drop_last:
            return problem_size // batch_size
        else:
            return (problem_size // batch_size) + 1

    def export_experiment_configuration(self, log_dir, filename, user_confirm):
        """
        Dumps the configuration to ``yaml`` file.

        :param log_dir: Directory used to host log files (such as the collected statistics).
        :type log_dir: str

        :param filename: Name of the ``yaml`` file to write to.
        :type filename: str

        :param user_confirm: Whether to request user confirmation.
        :type user_confirm: bool


        """
        # -> At this point, all configuration for experiment is complete.

        # Display results of parsing.
        self.display_parsing_results()

        # Log the resulting training configuration.
        conf_str = 'Final parameter registry configuration:\n'
        conf_str += '=' * 80 + '\n'
        conf_str += yaml.safe_dump(self.params.to_dict(),
                                   default_flow_style=False)
        conf_str += '=' * 80 + '\n'
        self.logger.info(conf_str)

        # Save the resulting configuration into a .yaml settings file, under log_dir
        with open(log_dir + filename, 'w') as yaml_backup_file:
            yaml.dump(self.params.to_dict(),
                      yaml_backup_file,
                      default_flow_style=False)

        # Ask for confirmation - optional.
        if user_confirm:
            try:
                input('Press <Enter> to confirm and start the experiment\n')
            except KeyboardInterrupt:
                exit(0)

    def add_statistics(self, stat_col):
        """
        Adds most elementary shared statistics to ``StatisticsCollector``: episode and loss.

        :param stat_col: ``StatisticsCollector``.

        """
        # Add default statistics with formatting.
        stat_col.add_statistic('loss', '{:12.10f}')
        stat_col.add_statistic('episode', '{:06d}')

    def add_aggregators(self, stat_agg):
        """
        Adds basic statistical aggregators to ``StatisticsAggregator``: episode, \
        episodes_aggregated and loss derivatives.

        :param stat_agg: ``StatisticsAggregator``.

        """
        # add 'aggregators' for the episode.
        stat_agg.add_aggregator('episode', '{:06d}')
        # Number of aggregated episodes.
        stat_agg.add_aggregator('episodes_aggregated', '{:06d}')

        # Add default statistical aggregators for the loss (indicating a formatting).
        # Represents the average loss, but stying with loss for TensorBoard "variable compatibility".
        stat_agg.add_aggregator('loss', '{:12.10f}')
        stat_agg.add_aggregator('loss_min', '{:12.10f}')
        stat_agg.add_aggregator('loss_max', '{:12.10f}')
        stat_agg.add_aggregator('loss_std', '{:12.10f}')

    def aggregate_statistics(self, stat_col, stat_agg):
        """
        Aggregates the default statistics collected by the ``StatisticsCollector``.


        .. note::
            Only computes the min, max, mean, std of the loss as these are basic statistical aggregator by default.

            Given that the ``StatisticsAggregator`` uses the statistics collected by the ``StatisticsCollector``, \
            It should be ensured that these statistics are correctly collected (i.e. use of ``self.add_statistics()`` \
            and ``collect_statistics()``).

        :param stat_col: ``StatisticsCollector``

        :param stat_agg: ``StatisticsAggregator``

        """
        # By default, copy the last value for all variables have matching names.
        # (will work well for e.g. episode or epoch)
        for k, v in stat_col.items():
            if k in stat_agg.aggregators:
                # Copy last collected value.
                stat_agg.aggregators[k] = v[-1]

        # Get loss values.
        loss_values = stat_col['loss']

        # Calculate default aggregates.
        stat_agg.aggregators['loss'] = torch.mean(torch.tensor(loss_values))
        stat_agg.aggregators['loss_min'] = min(loss_values)
        stat_agg.aggregators['loss_max'] = max(loss_values)
        stat_agg.aggregators['loss_std'] = 0.0 if len(
            loss_values) <= 1 else torch.std(torch.tensor(loss_values))
        stat_agg.aggregators['episodes_aggregated'] = len(loss_values)

    @abstractmethod
    def run_experiment(self):
        """
        Main function of the worker which executes a specific experiment.

        .. note::

            Abstract. Should be implemented in the subclasses.


        """

    def add_file_handler_to_logger(self, logfile):
        """
        Add a ``logging.FileHandler`` to the logger of the current ``Worker``.

        Specifies a ``logging.Formatter``:

            >>> logging.Formatter(fmt='[%(asctime)s] - %(levelname)s - %(name)s >>> %(message)s',
            >>>                   datefmt='%Y-%m-%d %H:%M:%S')


        :param logfile: File used by the ``FileHandler``.

        """
        # create file handler which logs even DEBUG messages
        fh = logging.FileHandler(logfile)

        # set logging level for this file
        fh.setLevel(logging.DEBUG)

        # create formatter and add it to the handlers
        formatter = logging.Formatter(
            fmt='[%(asctime)s] - %(levelname)s - %(name)s >>> %(message)s',
            datefmt='%Y-%m-%d %H:%M:%S')
        fh.setFormatter(formatter)

        # add the handler to the logger
        self.logger.addHandler(fh)

    def recurrent_config_parse(self, configs: str, configs_parsed: list):
        """
        Parses names of configuration files in a recursive manner, i.e. \
        by looking for ``default_config`` sections and trying to load and parse those \
        files one by one.

        :param configs: String containing names of configuration files (with paths), separated by comas.
        :type configs: str

        :param configs_parsed: Configurations that were already parsed (so we won't parse them many times).
        :type configs_parsed: list


        :return: list of parsed configuration files.

        """
        # Split and remove spaces.
        configs_to_parse = configs.replace(" ", "").split(',')

        # Terminal condition.
        while len(configs_to_parse) > 0:

            # Get config.
            config = configs_to_parse.pop(0)

            # Skip empty names (after lose comas).
            if config == '':
                continue
            print("Info: Parsing the {} configuration file".format(config))

            # Check if it was already loaded.
            if config in configs_parsed:
                print(
                    'Warning: Configuration file {} already parsed - skipping'.
                    format(config))
                continue

            # Check if file exists.
            if not os.path.isfile(config):
                print('Error: Configuration file {} does not exist'.format(
                    config))
                exit(-1)

            try:
                # Open file and get parameter dictionary.
                with open(config, 'r') as stream:
                    param_dict = yaml.safe_load(stream)
            except yaml.YAMLError as e:
                print(
                    "Error: Couldn't properly parse the {} configuration file".
                    format(config))
                print('yaml.YAMLERROR:', e)
                exit(-1)

            # Remember that we loaded that config.
            configs_parsed.append(config)

            # Check if there are any default configs to load.
            if 'default_configs' in param_dict:
                # If there are - recursion!
                configs_parsed = self.recurrent_config_parse(
                    param_dict['default_configs'], configs_parsed)

        # Done, return list of loaded configs.
        return configs_parsed

    def recurrent_config_load(self, configs_to_load):
        for config in reversed(configs_to_load):
            # Load params from YAML file.
            self.params.add_config_params_from_yaml(config)
            print('Loaded configuration from file {}'.format(config))

    def check_and_set_cuda(self, use_gpu):
        """
        Enables computations on CUDA if GPU is available.
        Sets the default data types.

        :param use_gpu: Command line flag indicating whether use GPU/CUDA or not. 

        """
        # Determine if GPU/CUDA is available.
        if torch.cuda.is_available():
            if use_gpu:
                self.app_state.convert_cuda_types()
                self.logger.info(
                    'Running computations on GPU using CUDA enabled')
        elif use_gpu:
            self.logger.warning(
                'GPU flag is enabled but there are no available GPU devices, using CPU instead'
            )
        else:
            self.logger.warning('GPU flag is disabled, using CPU.')

    def predict_evaluate_collect(self,
                                 model,
                                 problem,
                                 data_dict,
                                 stat_col,
                                 episode,
                                 epoch=None):
        """
        Function that performs the following:

            - passes samples through the model,
            - computes loss using the problem
            - collects problem and model statistics,


        :param model: trainable model.
        :type model: ``models.model.Model`` or a subclass

        :param problem: problem generating samples.
        :type problem: ``problems.problem.problem`` or a subclass

        :param data_dict: contains the batch of samples to pass to the model.
        :type data_dict: ``DataDict``

        :param stat_col: statistics collector used for logging accuracy etc.
        :type stat_col: ``StatisticsCollector``

        :param episode: current episode index
        :type episode: int

        :param epoch: current epoch index.
        :type epoch: int, optional


        :return:

            - logits,
            - loss


        """
        # Convert to CUDA.
        if self.app_state.use_CUDA:
            data_dict = data_dict.cuda()

        # Perform forward calculation.
        logits = model(data_dict)

        # Evaluate loss function.
        loss = problem.evaluate_loss(data_dict, logits)

        # Collect "elementary" statistics - episode and loss.
        if ('epoch' in stat_col) and (epoch is not None):
            stat_col['epoch'] = epoch

        stat_col['episode'] = episode
        # Collect loss as float.
        stat_col['loss'] = loss

        # Collect other (potential) statistics from problem & model.
        problem.collect_statistics(stat_col, data_dict, logits)
        model.collect_statistics(stat_col, data_dict, logits)

        # Return tuple: logits, loss.
        return logits, loss

    def export_statistics(self, stat_obj, tag='', export_to_log=True):
        """
        Export the statistics/aggregations to logger, csv and TB.

        :param stat_obj: ``StatisticsCollector`` or ``StatisticsAggregato`` object.

        :param tag: Additional tag that will be added to string exported to logger, optional (DEFAULT = '').
        :type tag: str

        :param export_to_log: If True, exports statistics to logger (DEFAULT: True)
        :type export_to_log: bool

        """
        # Log to logger
        if export_to_log:
            self.logger.info(stat_obj.export_to_string(tag))

        # Export to csv
        stat_obj.export_to_csv()

        # Export to TensorBoard.
        stat_obj.export_to_tensorboard()

    def aggregate_and_export_statistics(self,
                                        problem,
                                        model,
                                        stat_col,
                                        stat_agg,
                                        episode,
                                        tag='',
                                        export_to_log=True):
        """
        Aggregates the collected statistics. Exports the aggregations to logger, csv and TB. \
        Empties statistics collector for the next episode.

        :param model: trainable model.
        :type model: ``models.model.Model`` or a subclass

        :param problem: problem generating samples.
        :type problem: ``problems.problem.problem`` or a subclass

        :param stat_col: ``StatisticsCollector`` object.

        :param stat_agg: ``StatisticsAggregator`` object.

        :param tag: Additional tag that will be added to string exported to logger, optional (DEFAULT = '').
        :type tag: str

        :param export_to_log: If True, exports statistics to logger (DEFAULT: True)
        :type export_to_log: bool

        """
        # Aggregate statistics.
        self.aggregate_statistics(stat_col, stat_agg)
        problem.aggregate_statistics(stat_col, stat_agg)
        model.aggregate_statistics(stat_col, stat_agg)

        # Set episode, so the datapoint will appear in the right place in TB.
        stat_agg["episode"] = episode

        # Export to logger, cvs and TB.
        self.export_statistics(stat_agg, tag, export_to_log)

    def cycle(self, iterable):
        """
        Cycle an iterator to prevent its exhaustion.
        This function is used in the (online) trainer to reuse the same ``DataLoader`` for a number of episodes\
        > len(dataset)/batch_size.

        :param iterable: iterable.
        :type iterable: iter

        """
        while True:
            for x in iterable:
                yield x

    def set_random_seeds(self, params, section_name):
        """
        Set ``torch`` & ``NumPy`` random seeds from the ``ParamRegistry``: \
        If one was indicated, use it, or set a random one.

        :param params: Section in config/param registry that will be changed \
            ("training" or "testing" only will be taken into account.)

        :param section_name: Name of the section (for logging purposes only).
        :type section_name: str

        """
        # Set the random seeds: either from the loaded configuration or a default randomly selected one.
        params.add_default_params({"seed_numpy": -1})
        if params["seed_numpy"] == -1:
            seed = randrange(0, 2**32)
            # Overwrite the config param!
            params.add_config_params({"seed_numpy": seed})

        self.logger.info("Setting numpy random seed in {} to: {}".format(
            section_name, params["seed_numpy"]))
        np.random.seed(params["seed_numpy"])

        params.add_default_params({"seed_torch": -1})
        if params["seed_torch"] == -1:
            seed = randrange(0, 2**32)
            # Overwrite the config param!
            params.add_config_params({"seed_torch": seed})

        self.logger.info("Setting torch random seed in {} to: {}".format(
            section_name, params["seed_torch"]))
        torch.manual_seed(params["seed_torch"])
        torch.cuda.manual_seed_all(params["seed_torch"])
Ejemplo n.º 27
0
        self.plotWindow.update(fig, frames)
        return self.plotWindow.is_closed


if __name__ == "__main__":
    # Set logging level.
    logger = logging.getLogger('NTM-Module')
    logging.basicConfig(level=logging.DEBUG)

    # Set visualization.
    from miprometheus.utils.app_state import AppState
    AppState().visualize = True

    # "Loaded parameters".
    from miprometheus.utils.param_interface import ParamInterface
    params = ParamInterface()
    params.add_default_params({
        # controller parameters
        'controller': {
            'name': 'GRUController',
            'hidden_state_size': 5,
            'num_layers': 1,
            'non_linearity': 'none',
            'output_size': 5
        },
        # interface parameters
        'interface': {
            'num_read_heads': 2,
            'shift_size': 3
        },
        # memory parameters
Ejemplo n.º 28
0
    def __init__(self, name, add_default_parser_args=True):
        """
        Base constructor for all workers:

            - Initializes the AppState singleton:

                >>> self.app_state = AppState()

            - Initializes the Parameter Registry:

                >>> self.params = ParamInterface()

            - Defines the logger:

                >>> self.logger = logging.getLogger(name=self.name)

            - Creates parser and adds default worker command line arguments.

        :param name: Name of the worker.
        :type name: str

        :param add_default_parser_args: If set, adds default parser arguments (DEFAULT: True).
        :type add_default_parser_args: bool

        """
        # Call base constructor.
        super(Worker, self).__init__()

        # Set worker name.
        self.name = name

        # Initialize the application state singleton.
        self.app_state = AppState()

        # Initialize parameter interface/registry.
        self.params = ParamInterface()

        # Initialize logger using the configuration.
        self.initialize_logger()

        # Create parser with a list of runtime arguments.
        self.parser = argparse.ArgumentParser(
            formatter_class=argparse.RawTextHelpFormatter)

        # Add arguments to the specific parser.
        if add_default_parser_args:
            # These arguments will be shared by all basic workers.
            self.parser.add_argument(
                '--config',
                dest='config',
                type=str,
                default='',
                help='Name of the configuration file(s) to be loaded. '
                'If specifying more than one file, they must be separated with coma ",".'
            )

            self.parser.add_argument(
                '--model',
                type=str,
                default='',
                dest='model',
                help='Path to the file containing the saved parameters'
                ' of the model to load (model checkpoint, should end with a .pt extension.)'
            )

            self.parser.add_argument(
                '--gpu',
                dest='use_gpu',
                action='store_true',
                help=
                'The current worker will move the computations on GPU devices, if available '
                'in the system. (Default: False)')

            self.parser.add_argument(
                '--expdir',
                dest='expdir',
                type=str,
                default="./experiments",
                help=
                'Path to the directory where the experiment(s) folders are/will be stored.'
                ' (DEFAULT: ./experiments)')

            self.parser.add_argument('--savetag',
                                     dest='savetag',
                                     type=str,
                                     default='',
                                     help='Tag for the save directory.')

            self.parser.add_argument('--ll',
                                     action='store',
                                     dest='log_level',
                                     type=str,
                                     default='INFO',
                                     choices=[
                                         'CRITICAL', 'ERROR', 'WARNING',
                                         'INFO', 'DEBUG', 'NOTSET'
                                     ],
                                     help="Log level. (Default: INFO)")

            self.parser.add_argument(
                '--li',
                dest='logging_interval',
                default=100,
                type=int,
                help=
                'Statistics logging interval. Will impact logging to the logger and '
                'exporting to TensorBoard. Writing to the csv file is not impacted '
                '(interval of 1).(Default: 100, i.e. logs every 100 episodes).'
            )

            self.parser.add_argument(
                '--agree',
                dest='confirm',
                action='store_true',
                help=
                'Request user confirmation just after loading the settings, '
                'before starting training. (Default: False)')
Ejemplo n.º 29
0
        # Plot!
        plt.show()
        exit()


if __name__ == '__main__':
    """ Tests CNN_LSTM on SortOfCLEVR"""

    # "Loaded parameters".
    from miprometheus.utils.param_interface import ParamInterface
    from miprometheus.utils.app_state import AppState
    app_state = AppState()
    app_state.visualize = True
    from miprometheus.problems.image_text_to_class.sort_of_clevr import SortOfCLEVR
    problem_params = ParamInterface()
    problem_params.add_config_params({
        'data_folder': '~/data/sort-of-clevr/',
        'split': 'train',
        'regenerate': False,
        'dataset_size': 10000,
        'img_size': 128
    })

    # create problem
    sortofclevr = SortOfCLEVR(problem_params)

    batch_size = 64

    # wrap DataLoader on top of this Dataset subclass
    from torch.utils.data import DataLoader
Ejemplo n.º 30
0
                   aspect='auto')

        # Plot!
        plt.show()


if __name__ == '__main__':
    """ Tests StackedAttentionNetwork on SortOfCLEVR"""

    # "Loaded parameters".
    from miprometheus.utils.param_interface import ParamInterface
    from miprometheus.utils.app_state import AppState
    app_state = AppState()
    app_state.visualize = True
    from miprometheus.problems.image_text_to_class.sort_of_clevr import SortOfCLEVR
    problem_params = ParamInterface()
    problem_params.add_config_params({
        'data_folder': '~/data/sort-of-clevr/',
        'split': 'train',
        'regenerate': False,
        'dataset_size': 10000,
        'img_size': 128
    })

    # create problem
    sortofclevr = SortOfCLEVR(problem_params)

    batch_size = 64

    # wrap DataLoader on top of this Dataset subclass
    from torch.utils.data.dataloader import DataLoader