Esempio n. 1
0
    def __init__(self, task: Task = None, projectName: str = None, taskName: str = None,
            additionalLoggingValuesDict=None):
        """

        :param task: instances of trains.Task
        :param projectName: only necessary if task is not provided
        :param taskName: only necessary if task is not provided
        :param additionalLoggingValuesDict:
        """
        if task is None:
            if projectName is None or taskName is None:
                raise ValueError("Either the trains task or the project name and task name have to be provided")
            self.task = Task.init(project_name=projectName, task_name=taskName, reuse_last_task_id=False)
        else:
            if projectName is not None:
                log.warning(
                    f"projectName parameter with value {projectName} passed even though task has been given, "
                    f"will ignore this parameter"
                )
            if taskName is not None:
                log.warning(
                    f"taskName parameter with value {taskName} passed even though task has been given, "
                    f"will ignore this parameter"
                )
            self.task = task
        self.logger = self.task.get_logger()
        super().__init__(additionalLoggingValuesDict=additionalLoggingValuesDict)
Esempio n. 2
0
def main():
    # Create the experiment Task
    task = Task.init(project_name="examples", task_name="text reporting")

    print('reporting text logs')

    # report regular console print
    print('This is standard output test')

    # report stderr
    print('This is standard error test', file=sys.stderr)

    # Get the task logger,
    # You can also call Task.current_task().get_logger() from anywhere in your code.
    logger = task.get_logger()

    # report text based logs
    report_logs(logger)

    # force flush reports
    # If flush is not called, reports are flushed in the background every couple of seconds,
    # and at the end of the process execution
    logger.flush()

    print('We are done reporting, have a great day :)')
Esempio n. 3
0
 def __init__(
         self,
         project_name: Optional[str] = None,
         task_name: Optional[str] = None,
         task_type: str = 'training',
         reuse_last_task_id: bool = True,
         output_uri: Optional[str] = None,
         auto_connect_arg_parser: bool = True,
         auto_connect_frameworks: bool = True,
         auto_resource_monitoring: bool = True
 ) -> None:
     super().__init__()
     if self._bypass:
         self._trains = None
     else:
         self._trains = Task.init(
             project_name=project_name,
             task_name=task_name,
             task_type=task_type,
             reuse_last_task_id=reuse_last_task_id,
             output_uri=output_uri,
             auto_connect_arg_parser=auto_connect_arg_parser,
             auto_connect_frameworks=auto_connect_frameworks,
             auto_resource_monitoring=auto_resource_monitoring
         )
Esempio n. 4
0
    def __init__(
            self,
            pool_frequency=0.2,  # type: float
            default_execution_queue=None,  # type: Optional[str]
            pipeline_time_limit=None,  # type: Optional[float]
            auto_connect_task=True,  # type: Union[bool, Task]
            always_create_task=False,  # type: bool
            add_pipeline_tags=False,  # type: bool
    ):
        # type: (...) -> ()
        """
        Create a new pipeline controller. The newly created object will launch and monitor the new experiments.

        :param float pool_frequency: The pooling frequency (in minutes) for monitoring experiments / states.
        :param str default_execution_queue: The execution queue to use if no execution queue is provided
        :param float pipeline_time_limit: The maximum time (minutes) for the entire pipeline process. The
            default is ``None``, indicating no time limit.
        :param bool auto_connect_task: Store pipeline arguments and configuration in the Task
            - ``True`` - The pipeline argument and configuration will be stored in the current Task. All arguments will
              be under the hyper-parameter section ``Pipeline``, and the pipeline DAG will be stored as a
              Task configuration object named ``Pipeline``.

            - ``False`` - Do not store with Task.
            - ``Task`` - A specific Task object to connect the pipeline with.
        :param bool always_create_task: Always create a new Task
            - ``True`` - No current Task initialized. Create a new task named ``Pipeline`` in the ``base_task_id``
              project.

            - ``False`` - Use the :py:meth:`task.Task.current_task` (if exists) to report statistics.
        :param bool add_pipeline_tags: (default: False) if True, add `pipe: <pipeline_task_id>` tag to all
            steps (Tasks) created by this pipeline.
        """
        self._nodes = {}
        self._running_nodes = []
        self._start_time = None
        self._pipeline_time_limit = pipeline_time_limit * 60. if pipeline_time_limit else None
        self._default_execution_queue = default_execution_queue
        self._pool_frequency = pool_frequency * 60.
        self._thread = None
        self._stop_event = None
        self._experiment_created_cb = None
        self._add_pipeline_tags = add_pipeline_tags
        self._task = auto_connect_task if isinstance(
            auto_connect_task, Task) else Task.current_task()
        self._step_ref_pattern = re.compile(self._step_pattern)
        if not self._task and always_create_task:
            self._task = Task.init(
                project_name='Pipelines',
                task_name='Pipeline {}'.format(datetime.now()),
                task_type=Task.TaskTypes.controller,
            )

        # make sure all the created tasks are our children, as we are creating them
        if self._task:
            self._task.add_tags([self._tag])
            self._auto_connect_task = bool(auto_connect_task)
Esempio n. 5
0
def setup_trains_logging(config):
    if config["with_trains"]:
        from trains import Task

        task = Task.init("Carbon Black Semantic Segmentation Training",
                         config["task_name"])
        task.connect_configuration(config)

        # Log hyper parameters
        hyper_parameters = list(config.keys())
        task.connect({k: config[k] for k in hyper_parameters})
Esempio n. 6
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    parser.add_argument('--batch-size', type=int, default=64, metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
                        help='input batch size for testing (default: 1000)')
    parser.add_argument('--epochs', type=int, default=2, metavar='N',
                        help='number of epochs to train (default: 10)')
    parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
                        help='learning rate (default: 0.01)')
    parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
                        help='SGD momentum (default: 0.5)')
    parser.add_argument('--no-cuda', action='store_true', default=False,
                        help='disables CUDA training')
    parser.add_argument('--seed', type=int, default=1, metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument('--log-interval', type=int, default=10, metavar='N',
                        help='how many batches to wait before logging training status')
    args = parser.parse_args()
    args.cuda = not args.no_cuda and torch.cuda.is_available()

    task = Task.init(project_name='examples', task_name='pytorch with tensorboardX')
    writer = SummaryWriter('runs')
    writer.add_text('TEXT', 'This is some text', 0)

    torch.manual_seed(args.seed)
    if args.cuda:
        torch.cuda.manual_seed(args.seed)

    kwargs = {'num_workers': 4, 'pin_memory': True} if args.cuda else {}
    train_loader = torch.utils.data.DataLoader(datasets.MNIST('../data', train=True, download=True,
                                                              transform=transforms.Compose([
                                                                  transforms.ToTensor(),
                                                                  transforms.Normalize((0.1307,), (0.3081,))])),
                                               batch_size=args.batch_size, shuffle=True, **kwargs)
    test_loader = torch.utils.data.DataLoader(datasets.MNIST('../data', train=False,
                                                             transform=transforms.Compose([
                                                                 transforms.ToTensor(),
                                                                 transforms.Normalize((0.1307,), (0.3081,))])),
                                              batch_size=args.batch_size, shuffle=True, **kwargs)

    model = Net()
    if args.cuda:
        model.cuda()
    optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)

    for epoch in range(1, args.epochs + 1):
        train(model, epoch, train_loader, args, optimizer, writer)
        torch.save(model, os.path.join(gettempdir(), 'model{}'.format(epoch)))
    test(model, test_loader, args, optimizer, writer)
Esempio n. 7
0
def TrainModel(model, base_model, model_name):

    task = Task.init(project_name="Ex3ModelTrains", task_name=model_name)
    reporter = TrainsReporter()
    # Show a summary of the model. Check the number of trainable parameters
    model.summary()

    # Compile the model
    model.compile(loss=keras.losses.BinaryCrossentropy(from_logits=True),
                  optimizer=keras.optimizers.Adam(),
                  metrics=[metrics.BinaryAccuracy()])

    # Train the model
    model.fit(train_ds,
              steps_per_epoch=train_ds.samples / train_ds.batch_size,
              epochs=20,
              validation_data=valid_ds,
              validation_steps=valid_ds.samples / valid_ds.batch_size,
              callbacks=[reporter],
              verbose=1)

    # Unfreeze the base_model. Note that it keeps running in inference mode
    # since we passed `training=False` when calling it. This means that
    # the batchnorm layers will not update their batch statistics.
    # This prevents the batchnorm layers from undoing all the training
    # we've done so far.
    base_model.trainable = True
    reporter.epoch_ref = 20

    score = model.evaluate(test_ds)
    print('Test evaluation Score:', model.evaluate(test_ds))
    print('validation evaluation Score:', model.evaluate(valid_ds))

    model.compile(
        optimizer=keras.optimizers.Adam(1e-5),  # Low learning rate
        loss=keras.losses.BinaryCrossentropy(from_logits=True),
        metrics=[keras.metrics.BinaryAccuracy()],
    )

    model.fit(train_ds,
              steps_per_epoch=train_ds.samples / train_ds.batch_size,
              epochs=10,
              validation_data=valid_ds,
              validation_steps=valid_ds.samples / valid_ds.batch_size,
              callbacks=[reporter],
              verbose=1)

    score = model.evaluate(test_ds)
    print('Test evaluation Score:', model.evaluate(test_ds))
    print('validation evaluation Score:', model.evaluate(valid_ds))
Esempio n. 8
0
def main():
    parser = ArgumentParser()
    parser.add_argument(
        "--run",
        help="Run the autoscaler after wizard finished",
        action="store_true",
        default=False,
    )
    args = parser.parse_args()

    if running_remotely():
        hyper_params = AwsAutoScaler.Settings().as_dict()
        configurations = AwsAutoScaler.Configuration().as_dict()
    else:
        print("AWS Autoscaler setup\n")

        config_file = Path(CONF_FILE).absolute()
        if config_file.exists() and input_bool(
                "Load configurations from config file '{}' [Y/n]? ".format(
                    str(CONF_FILE)),
                default=True,
        ):
            with config_file.open("r") as f:
                conf = yaml.load(f, Loader=yaml.SafeLoader)
            hyper_params = conf["hyper_params"]
            configurations = conf["configurations"]
        else:
            configurations, hyper_params = run_wizard()

            try:
                with config_file.open("w+") as f:
                    conf = {
                        "hyper_params": hyper_params,
                        "configurations": configurations,
                    }
                    yaml.safe_dump(conf, f)
            except Exception:
                print(
                    "Error! Could not write configuration file at: {}".format(
                        str(CONF_FILE)))
                return

    task = Task.init(project_name="Auto-Scaler", task_name="AWS Auto-Scaler")
    task.connect(hyper_params)
    task.connect_configuration(configurations)

    autoscaler = AwsAutoScaler(hyper_params, configurations)

    if running_remotely() or args.run:
        autoscaler.start()
Esempio n. 9
0
    def __init__(self, *_, **kwargs):
        try:
            from trains import Task
            from trains.binding.frameworks.tensorflow_bind import WeightsGradientHistHelper
        except ImportError:
            raise RuntimeError(
                "This contrib module requires trains to be installed. "
                "You may install trains using: \n pip install trains \n")

        experiment_kwargs = {
            k: v
            for k, v in kwargs.items() if k not in (
                "project_name",
                "task_name",
                "task_type",
            )
        }

        if self.bypass_mode():
            warnings.warn("TrainsSaver: running in bypass mode")

            class _Stub(object):
                def __call__(self, *_, **__):
                    return self

                def __getattr__(self, attr):
                    if attr in ("name", "id"):
                        return ""
                    return self

                def __setattr__(self, attr, val):
                    pass

            self._task = _Stub()
        else:
            self._task = Task.init(
                project_name=kwargs.get("project_name"),
                task_name=kwargs.get("task_name"),
                task_type=kwargs.get("task_type", Task.TaskTypes.training),
                **experiment_kwargs,
            )

        self.trains_logger = self._task.get_logger()

        self.grad_helper = WeightsGradientHistHelper(
            logger=self.trains_logger, )
Esempio n. 10
0
def run(config, logger=None, local_rank=0, **kwargs):

    assert torch.cuda.is_available()
    assert torch.backends.cudnn.enabled, "Nvidia/Amp requires cudnn backend to be enabled."

    task = Task.init(
        "ignite", "DeeplabV3_ResNet101 pascal_voc2012 segmentation example")

    dist.init_process_group("nccl", init_method="env://")

    # As we passed config with option --manual_config_load
    assert hasattr(config, "setup"), (
        "We need to manually setup the configuration, please set --manual_config_load "
        "to py_config_runner")

    config = config.setup()

    assert_config(config, TRAINVAL_CONFIG)
    # The following attributes are automatically added by py_config_runner
    assert hasattr(config, "config_filepath") and isinstance(
        config.config_filepath, Path)
    assert hasattr(config, "script_filepath") and isinstance(
        config.script_filepath, Path)

    # dump python files to reproduce the run
    task.connect_configuration(config.config_filepath.as_posix())
    task.upload_artifact("script", config.script_filepath)

    config.output_path = Path("./artifacts")

    # log the configuration, if we are the master node
    if dist.get_rank() == 0:
        task.connect(get_params(config, TRAINVAL_CONFIG))

    try:
        training(config, local_rank=local_rank, with_trains_logging=True)
    except KeyboardInterrupt:
        logger.info("Caught KeyboardInterrupt -> exit")
    except Exception as e:  # noqa
        logger.exception("")
        dist.destroy_process_group()
        raise e

    dist.destroy_process_group()
Esempio n. 11
0
def main():
    # Create the experiment Task
    task = Task.init(project_name="examples", task_name="scalar reporting")

    print('reporting scalar graphs')

    # Get the task logger,
    # You can also call Task.current_task().get_logger() from anywhere in your code.
    logger = task.get_logger()

    # report scalars
    report_scalars(logger)

    # force flush reports
    # If flush is not called, reports are flushed in the background every couple of seconds,
    # and at the end of the process execution
    logger.flush()

    print('We are done reporting, have a great day :)')
Esempio n. 12
0
def run(config, **kwargs):
    """This is the main method to run the training. As this training script is launched with `py_config_runner`
    it should obligatory contain `run(config, **kwargs)` method.

    """

    assert torch.cuda.is_available(), torch.cuda.is_available()
    assert torch.backends.cudnn.enabled, "Nvidia/Amp requires cudnn backend to be enabled."

    with idist.Parallel(backend="nccl") as parallel:

        logger = setup_logger(name="Pascal-VOC12 Training",
                              distributed_rank=idist.get_rank())

        assert_config(config, TRAINVAL_CONFIG)
        # The following attributes are automatically added by py_config_runner
        assert hasattr(config, "config_filepath") and isinstance(
            config.config_filepath, Path)
        assert hasattr(config, "script_filepath") and isinstance(
            config.script_filepath, Path)

        if idist.get_rank() == 0 and exp_tracking.has_trains:
            from trains import Task

            task = Task.init("Pascal-VOC12 Training",
                             config.config_filepath.stem)
            task.connect_configuration(config.config_filepath.as_posix())

        log_basic_info(logger, config)

        config.output_path = Path(exp_tracking.get_output_path())
        # dump python files to reproduce the run
        exp_tracking.log_artifact(config.config_filepath.as_posix())
        exp_tracking.log_artifact(config.script_filepath.as_posix())
        exp_tracking.log_params(get_params(config, TRAINVAL_CONFIG))

        try:
            parallel.run(training, config, logger=logger)
        except KeyboardInterrupt:
            logger.info("Catched KeyboardInterrupt -> exit")
        except Exception as e:  # noqa
            logger.exception("")
            raise e
Esempio n. 13
0
 def trains(self,
            x: data_type,
            y: data_type = None,
            x_cv: data_type = None,
            y_cv: data_type = None,
            *,
            trains_config: Dict[str, Any] = None,
            keep_task_open: bool = False,
            queue: str = None) -> "Wrapper":
     if trains_config is None:
         return self.fit(x, y, x_cv, y_cv)
     # init trains
     if trains_config is None:
         trains_config = {}
     project_name = trains_config.get("project_name")
     task_name = trains_config.get("task_name")
     if queue is None:
         task = Task.init(**trains_config)
         cloned_task = None
     else:
         task = Task.get_task(project_name=project_name,
                              task_name=task_name)
         cloned_task = Task.clone(source_task=task, parent=task.id)
     # before loop
     self._verbose_level = 6
     self._data_config["verbose_level"] = 6
     self._before_loop(x, y, x_cv, y_cv)
     self.pipeline.use_tqdm = False
     copied_config = shallow_copy_dict(self.config)
     if queue is not None:
         cloned_task.set_parameters(copied_config)
         Task.enqueue(cloned_task.id, queue)
         return self
     # loop
     task.connect(copied_config)
     global trains_logger
     trains_logger = task.get_logger()
     self._loop()
     if not keep_task_open:
         task.close()
         trains_logger = None
     return self
Esempio n. 14
0
    def __init__(self,
                 project_name: Optional[str] = None,
                 task_name: Optional[str] = None,
                 task_type: str = 'training',
                 reuse_last_task_id: bool = True,
                 output_uri: Optional[str] = None,
                 auto_connect_arg_parser: bool = True,
                 auto_connect_frameworks: bool = True,
                 auto_resource_monitoring: bool = True) -> None:
        if not _TRAINS_AVAILABLE:
            raise ImportError(
                'You want to use `test_tube` logger which is not installed yet,'
                ' install it with `pip install test-tube`.')
        super().__init__()
        if self.bypass_mode():
            self._trains = None
            print('TRAINS Task: running in bypass mode')
            print('TRAINS results page: disabled')

            class _TaskStub(object):
                def __call__(self, *args, **kwargs):
                    return self

                def __getattr__(self, attr):
                    if attr in ('name', 'id'):
                        return ''
                    return self

                def __setattr__(self, attr, val):
                    pass

            self._trains = _TaskStub()
        else:
            self._trains = Task.init(
                project_name=project_name,
                task_name=task_name,
                task_type=task_type,
                reuse_last_task_id=reuse_last_task_id,
                output_uri=output_uri,
                auto_connect_arg_parser=auto_connect_arg_parser,
                auto_connect_frameworks=auto_connect_frameworks,
                auto_resource_monitoring=auto_resource_monitoring)
Esempio n. 15
0
    def __init__(
            self,
            project_name: Optional[str] = None,
            task_name: Optional[str] = None,
            task_type: str = 'training',
            reuse_last_task_id: bool = True,
            output_uri: Optional[str] = None,
            auto_connect_arg_parser: bool = True,
            auto_connect_frameworks: bool = True,
            auto_resource_monitoring: bool = True
    ) -> None:
        super().__init__()
        if self.bypass_mode():
            self._trains = None
            print('TRAINS Task: running in bypass mode')
            print('TRAINS results page: disabled')

            class _TaskStub(object):
                def __call__(self, *args, **kwargs):
                    return self

                def __getattr__(self, attr):
                    if attr in ('name', 'id'):
                        return ''
                    return self

                def __setattr__(self, attr, val):
                    pass

            self._trains = _TaskStub()
        else:
            self._trains = Task.init(
                project_name=project_name,
                task_name=task_name,
                task_type=task_type,
                reuse_last_task_id=reuse_last_task_id,
                output_uri=output_uri,
                auto_connect_arg_parser=auto_connect_arg_parser,
                auto_connect_frameworks=auto_connect_frameworks,
                auto_resource_monitoring=auto_resource_monitoring
            )
Esempio n. 16
0
def initialize_trains(arg_parser, project_name, tag):
    tb_logdir = None
    OPTS.trains_task = None
    if is_root_node():
        if OPTS.tensorboard:
            try:
                from trains import Task
                task = Task.init(project_name=project_name,
                                 task_name=tag,
                                 auto_connect_arg_parser=False,
                                 output_uri="{}/data/model_backups".format(
                                     os.getenv("HOME")))
                task.connect(arg_parser)
                task.set_random_seed(OPTS.seed)
                OPTS.trains_task = task
            except SystemError as e:
                print(e)
                pass
            tb_logdir = os.path.join(OPTS.root, "tensorboard")
            if not os.path.exists(tb_logdir):
                os.mkdir(tb_logdir)
    return tb_logdir
Esempio n. 17
0
def main():
    # Create the experiment Task
    task = Task.init(project_name="examples",
                     task_name="html samples reporting")

    print('reporting html files into debug samples section')

    # Get the task logger,
    # You can also call Task.current_task().get_logger() from anywhere in your code.
    logger = task.get_logger()

    # report html as debug samples
    report_html_image(logger)
    report_html_graph(logger)
    report_html_groupby(logger)
    report_html_periodic_table(logger)
    report_html_url(logger)

    # force flush reports
    # If flush is not called, reports are flushed in the background every couple of seconds,
    # and at the end of the process execution
    logger.flush()

    print('We are done reporting, have a great day :)')
Esempio n. 18
0
 def __init__(self,
              project_name: Optional[str] = None,
              task_name: Optional[str] = None,
              task_type: str = 'training',
              reuse_last_task_id: bool = True,
              output_uri: Optional[str] = None,
              auto_connect_arg_parser: bool = True,
              auto_connect_frameworks: bool = True,
              auto_resource_monitoring: bool = True) -> None:
     super().__init__()
     if self.bypass_mode():  # pragma: no-cover
         self._trains = None
         print('TRAINS Task: running in bypass mode')
         print('TRAINS results page: disabled')
     else:
         self._trains = Task.init(
             project_name=project_name,
             task_name=task_name,
             task_type=task_type,
             reuse_last_task_id=reuse_last_task_id,
             output_uri=output_uri,
             auto_connect_arg_parser=auto_connect_arg_parser,
             auto_connect_frameworks=auto_connect_frameworks,
             auto_resource_monitoring=auto_resource_monitoring)
Esempio n. 19
0
from time import sleep

import pandas as pd
import numpy as np
from PIL import Image
from trains import Task

task = Task.init('examples', 'artifacts toy')

df = pd.DataFrame(
    {
        'num_legs': [2, 4, 8, 0],
        'num_wings': [2, 0, 0, 0],
        'num_specimen_seen': [10, 2, 1, 8]
    },
    index=['falcon', 'dog', 'spider', 'fish'])

# Register Pandas object as artifact to watch
# (it will be monitored in the background and automatically synced and uploaded)
task.register_artifact('train',
                       df,
                       metadata={
                           'counting': 'legs',
                           'max legs': 69
                       })
# change the artifact object
df.sample(frac=0.5, replace=True, random_state=1)
# or access it from anywhere using the Task's get_registered_artifacts()
Task.current_task().get_registered_artifacts()['train'].sample(frac=0.5,
                                                               replace=True,
                                                               random_state=1)
Esempio n. 20
0
import os
import socket
import subprocess
import sys
from copy import deepcopy
from tempfile import mkstemp

import psutil

# make sure we have jupyter in the auto requirements
import jupyter  # noqa
from trains import Task


# initialize TRAINS
task = Task.init(
    project_name="DevOps", task_name="Allocate Jupyter Notebook Instance", task_type=Task.TaskTypes.service)

# get rid of all the runtime TRAINS
preserve = (
    "TRAINS_API_HOST",
    "TRAINS_WEB_HOST",
    "TRAINS_FILES_HOST",
    "TRAINS_CONFIG_FILE",
    "TRAINS_API_ACCESS_KEY",
    "TRAINS_API_SECRET_KEY",
    "TRAINS_API_HOST_VERIFY_CERT",
    "TRAINS_DOCKER_IMAGE",
)

# setup os environment
env = deepcopy(os.environ)
Esempio n. 21
0
    gpu_num = hvd.size()
else:
    part_index = 0
    part_num = 1
    gpu_num = 1

# Tensorboard Logging
tb_logdir = None
OPTS.trains_task = None
if is_root_node():
    print("Running on {} GPUs".format(gpu_num))
    if OPTS.tensorboard:
        try:
            from trains import Task
            task = Task.init(project_name="lanmt2",
                             task_name=OPTS.result_tag,
                             auto_connect_arg_parser=False,
                             output_uri=OPTS.root)
            task.connect(ap)
            task.set_random_seed(OPTS.seed)
            task.set_output_model_id(OPTS.model_tag)
            OPTS.trains_task = task
        except:
            pass
        if envswitch.who() != "shu":
            tb_str = "{}_lat{}_noise{}_lr{}".format(OPTS.modeltype,
                                                    OPTS.latentdim, OPTS.noise,
                                                    OPTS.ebm_lr)
            if OPTS.train_sgd_steps > 0:
                tb_str += "_imit{}".format(OPTS.train_sgd_steps)
            tb_logdir = os.path.join(HOME_DIR, "tensorboard", "ebm",
                                     "{}_cassio".format(OPTS.dtok), tb_str)
def main():
    # Init environment
    use_trains = False
    problem_name = 'cvrp'
    problem_type = 'uniform_offline'
    max_customer_times = 0
    size = 20
    vehicle_velocity = 1
    vehicle_capacity = 30
    random_seed = 0
    max_demand = 10
    start_at_depot = True
    EVAL_BASELINES_RESULTS_FILENAME = (
        f"experiments/{problem_name}/{size}s_{vehicle_capacity}c_{max_customer_times}t/"
        f"baseline_values.json")

    env_config = {
        'problem_type': problem_type,
        'max_customer_times': max_customer_times,
        'size': size,
        'max_demand': max_demand,
        'vehicle_velocity': vehicle_velocity,
        'vehicle_capacity': vehicle_capacity,
        'start_at_depot': start_at_depot,
        'random_seed': random_seed,
        'eval_baseline_results_filename': EVAL_BASELINES_RESULTS_FILENAME
    }
    if use_trains:
        task = Task.init(
            project_name="train_cvrp_pytorch",
            task_name=
            f'train_ppo_agent_{size}s_{vehicle_capacity}c_{max_customer_times}t'
        )
        logger = Task.current_task().get_logger()
        logger.tensorboard_single_series_per_graph(single_series=True)
    else:
        logger = None

    env = create_uniform_dynamic_problem(max_customer_times=max_customer_times,
                                         size=size,
                                         max_demand=max_demand,
                                         vehicle_velocity=vehicle_velocity,
                                         vehicle_capacity=vehicle_capacity,
                                         random_seed=random_seed,
                                         start_at_depot=start_at_depot)

    # customer_positions = [[0.25, 0.25], [0.5, 0.5], [1, 1]]
    # env = create_fixed_static_problem(customer_positions=customer_positions,
    #                                   depot_position=[0, 0],
    #                                   initial_vehicle_capacity=10,
    #                                   initial_vehicle_position=[0, 0],
    #                                   customer_demands=[1]*len(customer_positions),
    #                                   customer_times=[0]*len(customer_positions),
    #                                   vehicle_velocity=1)
    #
    # env_config = {'problem_type': 'fixed_problem',
    #               'size': 3,
    #               'vehicle_capacity': 10,
    #               'vehicle_position': [0, 0],
    #               'customer_positions': customer_positions,
    #               'start_at_depot': True
    #               }
    # EVAL_BASELINES_RESULTS_FILENAME = (f'experiments/{3}s_{10}c_{0}t/'
    #                                    f'baseline_values.json')

    tg_env = GeometricAttentionWrapper(env)
    tg_env.reset()

    # model_config = {
    #     'use_value_critic': True,
    #     'num_features': 4,
    #     'embedding_dim': 128,
    #     'value_embedding_dim': 128,
    #     'use_batch_norm': False
    # }
    model_config = {
        'n_passes': 4,
        'edge_embedding_dim': 64,
        'node_embedding_dim': 64,
        'global_embedding_dim': 64,
        'edge_hidden_dim': 64,
        'edge_target_dim': 64,
        'node_target_dim': 64,
        'node_dim_out': 1,
        'edge_dim_out': 1,
        'node_hidden_dim': 64,
        'global_hidden_dim': 64,
        'global_target_dim': 64,
        'global_dim_out': 64,
        'edge_feature_dim': 1,
        'node_feature_dim': 5,  # indicator, x, y, demand/capacity, is_visited
        'global_feature_dim': 1,
        'value_embedding_dim': 64,
        'use_value_critic': True,
        'use_batch_norm': False
    }

    agent_config = {
        'lr': 0.0003,
        'discount': 0.99,
        # number of episodes to do altogether
        'number_of_episodes': 50000000,
        # a batch is N episodes where N is number_of_episodes_in_batch
        'number_of_episodes_in_batch':
        20,  # this must be a division of number of episodes
        'total_num_eval_seeds': 100,
        'num_eval_seeds': 10,
        'evaluate_every': 50,
        'num_train_seeds': 1000,
        'reward_average_window_size': 10,
        'entropy_coeff': 0.001,  # consider decreasing this back
        'value_coeff': 0.1,
        'model_config': model_config,
        'save_checkpoint_every': 1000,
        'eps_clip': 0.5,
        'n_ppo_updates': 80,
        'target_kl': 0.0001,
        'logit_normalizer': 5,
        'problem_name': problem_name  # used for saving results
    }
    model_config['logit_normalizer'] = agent_config['logit_normalizer']
    agent_config['run_name'] = f"ep_in_batch_{agent_config['number_of_episodes_in_batch']}_" \
                               f"n_eval_{agent_config['num_eval_seeds']}_lr_{agent_config['lr']}"
    eval_seeds = list(range(agent_config['total_num_eval_seeds']))
    baseline_results_path = Path(EVAL_BASELINES_RESULTS_FILENAME)
    or_tools_policy = ORToolsPolicy(timeout=10)
    if not baseline_results_path.exists():
        baseline_values = {
            'distance':
            evaluate_policy_simple(env,
                                   eval_seeds,
                                   distance_proportional_policy,
                                   samples_per_seed=5),
            'ORTools':
            evaluate_policy_simple(env,
                                   eval_seeds,
                                   or_tools_policy,
                                   samples_per_seed=5)
        }
        baseline_results_path.parent.mkdir(parents=True, exist_ok=True)
        with open(baseline_results_path, 'w') as f:
            json.dump(baseline_values, f, indent=2)
    else:
        print(f"loading: {EVAL_BASELINES_RESULTS_FILENAME}")
        with open(baseline_results_path, 'r') as f:
            baseline_values = json.load(f)
            # JSON saves dictionary keys as strings, so we have to convert them back to ints
            baseline_values = {
                baseline:
                {int(seed): val
                 for seed, val in baseline_dict.items()}
                for baseline, baseline_dict in baseline_values.items()
            }

    # model = PolicyFullyConnectedGAT(cfg=model_config, model_name='ppo_policy_model')
    model = PolicyFullyConnectedMessagePassing(
        cfg=model_config, model_name='ppo_message_passing_model')
    set_seeds()
    if use_trains:
        parameters_agent = task.connect(agent_config, name='agent_config')
        parameters_env = task.connect(env_config, name='env_config')
    agent_config['env_config'] = env_config
    ppo_agent = PPOAgent(tg_env,
                         config=agent_config,
                         model=model,
                         eval_seeds=eval_seeds,
                         baseline_eval_values=baseline_values)
    ppo_agent.train()
Esempio n. 23
0
# TRAINS - Example of manual graphs and  statistics reporting
#
import numpy as np
import logging
from trains import Task

task = Task.init(project_name='examples', task_name='Manual reporting')

# example python logger
logging.getLogger().setLevel('DEBUG')
logging.debug('This is a debug message')
logging.info('This is an info message')
logging.warning('This is a warning message')
logging.error('This is an error message')
logging.critical('This is a critical message')

# get TRAINS logger object for any metrics / reports
logger = task.get_logger()

# log text
logger.console("hello")

# report scalar values
logger.report_scalar("example_scalar", "series A", iteration=0, value=100)
logger.report_scalar("example_scalar", "series A", iteration=1, value=200)

# report histogram
histogram = np.random.randint(10, size=10)
logger.report_vector("example_histogram",
                     "random histogram",
                     iteration=1,
Esempio n. 24
0
else:
    part_index = 0
    part_num = 1
    gpu_num = 1

# Tensorboard Logging
tb_logdir = None
OPTS.trains_task = None
if is_root_node():
    print("Running on {} GPUs".format(gpu_num))
    if OPTS.tensorboard:
        try:
            from trains import Task
            task = Task.init(project_name="EBM_LM",
                             task_name=OPTS.result_tag,
                             auto_connect_arg_parser=False,
                             output_uri="{}/data/model_backups".format(
                                 os.getenv("HOME")))
            task.connect(ap)
            task.set_random_seed(OPTS.seed)
            OPTS.trains_task = task
        except SystemError as e:
            print(e)
            pass
        tb_logdir = os.path.join(OPTS.root, "tensorboard")
        if not os.path.exists(tb_logdir):
            os.mkdir(tb_logdir)

# Get the path variables
(train_src_corpus, train_tgt_corpus, distilled_tgt_corpus, truncate_datapoints,
 test_src_corpus, test_tgt_corpus, ref_path, src_vocab_path, tgt_vocab_path,
Esempio n. 25
0
import sys
from argparse import ArgumentParser

from absl import app
from absl import flags
from absl import logging

from trains import Task


FLAGS = flags.FLAGS

flags.DEFINE_string('echo', None, 'Text to echo.')
flags.DEFINE_string('another_str', 'My string', 'A string', module_name='test')

task = Task.init(project_name='examples', task_name='hyper-parameters example')

flags.DEFINE_integer('echo3', 3, 'Text to echo.')
flags.DEFINE_string('echo5', '5', 'Text to echo.', module_name='test')


parameters = {
    'list': [1, 2, 3],
    'dict': {'a': 1, 'b': 2},
    'tuple': (1, 2, 3),
    'int': 3,
    'float': 2.2,
    'string': 'my string',
}
parameters = task.connect(parameters)
from argparse import ArgumentParser
from pathlib2 import Path

from utilities import get_iou_types, draw_boxes, get_model_instance_segmentation, CocoLikeAnnotations, get_backbone
from torchvision_references import utils
from torchvision.transforms import functional as F

from PIL import Image
from transforms import get_transform

from SSD.ssd_model import SSD
from SSD.multibox_loss import SSDLoss

from trains import Task
task = Task.init(
    project_name='Object Detection with TRAINS, Ignite and TensorBoard',
    task_name='Inference with trained SSD model')


def rescale_box(box, image_size, orig_height, orig_width):
    rescale_height = float(orig_height) / image_size
    rescale_width = float(orig_width) / image_size
    box[:2] *= rescale_width
    box[2:] *= rescale_height
    return box


def run(task_args):
    writer = SummaryWriter(log_dir=task_args.log_dir)
    input_checkpoint = torch.load(task_args.input_checkpoint)
    labels_enum = input_checkpoint.get('labels_enumeration')
Esempio n. 27
0
from __future__ import absolute_import, division, print_function

import argparse
import os
import sys
import time
from tempfile import gettempdir

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

from trains import Task

tf.compat.v1.enable_eager_execution()

task = Task.init(project_name='examples', task_name='Tensorflow eager mode')

FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_integer('data_num', 100, """Flag of type integer""")
tf.app.flags.DEFINE_string('img_path', './img', """Flag of type string""")

layers = tf.keras.layers
FLAGS = None


class Discriminator(tf.keras.Model):
    """
    GAN Discriminator.
    A network to differentiate between generated and real handwritten digits.
    """
    def __init__(self, data_format):
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from PIL import Image
import matplotlib.pyplot as plt

import torchvision.transforms as transforms
import torchvision.models as models

import copy
from trains import Task


task = Task.init(project_name='examples', task_name='pytorch with matplotlib example', task_type=Task.TaskTypes.testing)


######################################################################
# Next, we need to choose which device to run the network on and import the
# content and style images. Running the neural transfer algorithm on large
# images takes longer and will go much faster when running on a GPU. We can
# use ``torch.cuda.is_available()`` to detect if there is a GPU available.
# Next, we set the ``torch.device`` for use throughout the tutorial. Also the ``.to(device)``
# method is used to move tensors or modules to a desired device.

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

######################################################################
# Loading the Images
# ------------------
Esempio n. 29
0
    def _patched_parse_args(original_parse_fn,
                            self,
                            args=None,
                            namespace=None):
        # if we are running remotely, we always have a task id, so we better patch the argparser as soon as possible.
        if not PatchArgumentParser._current_task:
            from ..config import running_remotely
            if running_remotely():
                # this will cause the current_task() to set PatchArgumentParser._current_task
                from trains import Task
                # noinspection PyBroadException
                try:
                    Task.init()
                except Exception:
                    pass
        # automatically connect to current task:
        if PatchArgumentParser._current_task:
            from ..config import running_remotely

            if PatchArgumentParser._calling_current_task:
                # if we are here and running remotely by now we should try to parse the arguments
                if original_parse_fn:
                    PatchArgumentParser._add_last_parsed_args(
                        original_parse_fn(self, args=args,
                                          namespace=namespace))
                return PatchArgumentParser._last_parsed_args[-1]

            PatchArgumentParser._calling_current_task = True
            # Store last instance and result
            PatchArgumentParser._add_last_arg_parser(self)
            parsed_args = None
            # parse if we are running in dev mode
            if not running_remotely() and original_parse_fn:
                parsed_args = original_parse_fn(self,
                                                args=args,
                                                namespace=namespace)
                PatchArgumentParser._add_last_parsed_args(parsed_args)

            # noinspection PyBroadException
            try:
                # sync to/from task
                # noinspection PyProtectedMember
                PatchArgumentParser._current_task._connect_argparse(
                    self,
                    args=args,
                    namespace=namespace,
                    parsed_args=parsed_args[0] if isinstance(
                        parsed_args, tuple) else parsed_args)
            except Exception:
                pass

            # sync back and parse
            if running_remotely() and original_parse_fn:
                # if we are running python2 check if we have subparsers,
                # if we do we need to patch the args, because there is no default subparser
                if PY2:
                    import itertools

                    def _get_sub_parsers_defaults(subparser, prev=[]):
                        actions_grp = [
                            a._actions for a in subparser.choices.values()
                        ] if isinstance(subparser, _SubParsersAction) else [
                            subparser._actions
                        ]
                        sub_parsers_defaults = [[
                            subparser
                        ]] if hasattr(subparser,
                                      'default') and subparser.default else []
                        for actions in actions_grp:
                            sub_parsers_defaults += [
                                _get_sub_parsers_defaults(a, prev)
                                for a in actions
                                if isinstance(a, _SubParsersAction)
                                and hasattr(a, 'default') and a.default
                            ]

                        return list(
                            itertools.chain.from_iterable(
                                sub_parsers_defaults))

                    sub_parsers_defaults = _get_sub_parsers_defaults(self)
                    if sub_parsers_defaults:
                        if args is None:
                            # args default to the system args
                            import sys as _sys
                            args = _sys.argv[1:]
                        else:
                            args = list(args)
                        # make sure we append the subparsers
                        for a in sub_parsers_defaults:
                            if a.default not in args:
                                args.append(a.default)

                PatchArgumentParser._add_last_parsed_args(
                    original_parse_fn(self, args=args, namespace=namespace))
            else:
                PatchArgumentParser._add_last_parsed_args(parsed_args or {})

            PatchArgumentParser._calling_current_task = False
            return PatchArgumentParser._last_parsed_args[-1]

        # Store last instance and result
        PatchArgumentParser._add_last_arg_parser(self)
        PatchArgumentParser._add_last_parsed_args(
            {} if not original_parse_fn else original_parse_fn(
                self, args=args, namespace=namespace))
        return PatchArgumentParser._last_parsed_args[-1]
Esempio n. 30
0
# TRAINS - Example of manual model configuration and uploading
#
import os
from tempfile import gettempdir

import torch
from trains import Task

task = Task.init(project_name='examples',
                 task_name='Model configuration and upload')

# create a model
model = torch.nn.Module

# Connect a local configuration file
config_file = os.path.join('..', '..', 'reporting', 'data_samples',
                           'sample.json')
config_file = task.connect_configuration(config_file)
# then read configuration as usual, the backend will contain a copy of it.
# later when executing remotely, the returned `config_file` will be a temporary file
# containing a new copy of the configuration retrieved form the backend
# # model_config_dict = json.load(open(config_file, 'rt'))

# Or Store dictionary of definition for a specific network design
model_config_dict = {
    'value': 13.37,
    'dict': {
        'sub_value': 'string',
        'sub_integer': 11
    },
    'list_of_ints': [1, 2, 3, 4],