Beispiel #1
0
def start(logger, full_id, fetch=True, env=None, volumes=None, cpus=None, memory=None, gpu_devices=None, offline=False):
    """
    Starts the job with all logging of a job_id
    """

    owner, name, id = unpack_full_job_id(full_id)

    if isinstance(sys.stdout, GeneralLogger):
        # we don't want to have stuff written to stdout before in job's log
        sys.stdout.clear_buffer()

    job_backend = JobBackend(model_name=owner + '/' + name)

    if fetch:
        job_backend.fetch(id)

    job_backend.restart(id)
    job_backend.start(collect_system=False, offline=offline)
    job_backend.set_status('PREPARE', add_section=False)

    job = job_backend.get_job_model()

    if not cpus:
        cpus = job.get_cpu()

    if not memory:
        memory = job.get_memory()

    if not gpu_devices and job.get_gpu():
        # if requested 2 GPUs and we have 3 GPUs with id [0,1,2], gpus should be [0,1]
        gpu_devices = []
        for i in range(0, job.get_gpu()):
            gpu_devices.append(i)

    start_command(logger, job_backend, env, volumes, cpus=cpus, memory=memory, gpu_devices=gpu_devices, offline=offline)
Beispiel #2
0
def start(logger,
          full_id,
          fetch=True,
          env=None,
          volumes=None,
          gpu_devices=None):
    """
    Starts the training process with all logging of a job_id
    """

    owner, name, id = unpack_full_job_id(full_id)

    if isinstance(sys.stdout, GeneralLogger):
        # we don't want to have stuff written to stdout before in job's log
        sys.stdout.clear_buffer()

    job_backend = JobBackend(model_name=owner + '/' + name)

    if fetch:
        job_backend.fetch(id)

    job_backend.restart(id)
    job_backend.start()
    job_backend.set_status('PREPARE')
    job_backend.monitoring_thread.handle_max_time = False

    start_command(logger, job_backend, env, volumes, gpu_devices=gpu_devices)
Beispiel #3
0
def start(logger,
          full_id,
          hyperparameter=None,
          dataset_id=None,
          server='local',
          insights=False):
    """
    Starts the training process with all logging of a job_id
    :type id: string : job id or model name
    """
    id = None

    if full_id.count('/') == 1:
        owner, name = full_id.split('/')
    elif full_id.count('/') >= 2:
        owner, name, id = unpack_full_job_id(full_id)
    else:
        logger.error(
            "Invalid id %s given. Supported formats: owner/modelName or owner/modelName/jobId."
            % (full_id, ))
        sys.exit(1)

    job_backend = JobBackend(model_name=owner + '/' + name)
    if id:
        job_backend.restart(id)
    else:
        try:
            create_info = api.create_job_info(full_id, hyperparameter,
                                              dataset_id)
        except api.ApiError as e:
            if 'Connection refused' in e.reason:
                logger.error("You are offline")
            logger.error(
                "Can not start new job without knowing what model type it is. "
                "Use your script directly if its a Python model.")
            raise

        if not create_info:
            raise Exception(
                'Could not fetch model information. Are you online and have access to the given model?'
            )

        job_backend.create(create_info=create_info,
                           hyperparameter=hyperparameter,
                           server=server,
                           insights=insights)

    if not len(job_backend.get_job_model().config):
        raise Exception(
            'Job does not have a configuration. Make sure you created the job via AETROS Trainer.'
        )

    if job_backend.is_simple_model():
        start_keras(logger, job_backend)
    else:
        start_custom(logger, job_backend)
Beispiel #4
0
def start(logger,
          full_id,
          fetch=True,
          env=None,
          volumes=None,
          cpus=None,
          memory=None,
          gpu_devices=None,
          offline=False):
    """
    Starts the job with all logging of a job_id
    """

    owner, name, id = unpack_full_job_id(full_id)

    if isinstance(sys.stdout, GeneralLogger):
        # we don't want to have stuff written to stdout before in job's log
        sys.stdout.clear_buffer()

    job_backend = JobBackend(model_name=owner + '/' + name)

    if fetch:
        job_backend.fetch(id)

    job_backend.restart(id)
    job_backend.start(collect_system=False, offline=offline)
    job_backend.set_status('PREPARE', add_section=False)

    job = job_backend.get_job_model()

    if not cpus:
        cpus = job.get_cpu()

    if not memory:
        memory = job.get_memory()

    if not gpu_devices and job.get_gpu():
        # if requested 2 GPUs and we have 3 GPUs with id [0,1,2], gpus should be [0,1]
        gpu_devices = []
        for i in range(0, job.get_gpu()):
            gpu_devices.append(i)

    start_command(logger,
                  job_backend,
                  env,
                  volumes,
                  cpus=cpus,
                  memory=memory,
                  gpu_devices=gpu_devices,
                  offline=offline)
Beispiel #5
0
def predict(logger, job_id, file_paths, weights_path=None):

    owner, name, id = unpack_full_job_id(job_id)

    job_backend = JobBackend(model_name=owner + '/' + name)
    job_backend.load(id)

    job_model = job_backend.get_job_model()
    os.chdir(job_backend.git.work_tree)

    if not weights_path:
        weights_path = job_model.get_weights_filepath_latest()

    from .Trainer import Trainer

    trainer = Trainer(job_backend)
    job_model.set_input_shape(trainer)

    import keras.backend
    if hasattr(keras.backend, 'set_image_dim_ordering'):
        keras.backend.set_image_dim_ordering('tf')

    if hasattr(keras.backend, 'set_image_data_format'):
        keras.backend.set_image_data_format('channels_last')

    job_backend.logger.info("Load model and compile ...")

    model = job_model.get_built_model(trainer)
    trainer.model = model

    from aetros.keras import load_weights
    logger.info('Load weights from ' + weights_path)
    load_weights(model, weights_path)

    inputs = []
    for idx, file_path in enumerate(file_paths):
        inputs.append(
            job_model.convert_file_to_input_node(
                file_path, job_model.get_input_node(idx)))

    job_backend.logger.info("Start prediction ...")

    prediction = job_model.predict(trainer, np.array(inputs))

    print(json.dumps(prediction, indent=4, default=invalid_json_values))
Beispiel #6
0
def predict(logger, job_id, file_paths, weights_path=None):

    owner, name, id = unpack_full_job_id(job_id)

    job_backend = JobBackend(model_name=owner+'/'+name)
    job_backend.fetch(id)
    job_backend.load(id)

    job_model = job_backend.get_job_model()
    os.chdir(job_backend.git.work_tree)

    if not weights_path:
        weights_path = job_model.get_weights_filepath_latest()

    from .Trainer import Trainer

    trainer = Trainer(job_backend)
    job_model.set_input_shape(trainer)

    import keras.backend
    if hasattr(keras.backend, 'set_image_dim_ordering'):
        keras.backend.set_image_dim_ordering('tf')

    if hasattr(keras.backend, 'set_image_data_format'):
        keras.backend.set_image_data_format('channels_last')

    job_backend.logger.info("Load model and compile ...")

    model = job_model.get_built_model(trainer)
    trainer.model = model

    from aetros.keras import load_weights
    logger.info('Load weights from ' + weights_path)
    load_weights(model, weights_path)

    inputs = []
    for idx, file_path in enumerate(file_paths):
        inputs.append(job_model.convert_file_to_input_node(file_path, job_model.get_input_node(idx)))

    job_backend.logger.info("Start prediction ...")

    prediction = job_model.predict(trainer, np.array(inputs))

    print(simplejson.dumps(prediction, indent=4, default=invalid_json_values))
    def main(self, args):
        import aetros.const

        parser = argparse.ArgumentParser(
            formatter_class=argparse.RawTextHelpFormatter,
            prog=aetros.const.__prog__ + ' run',
            description="Internal usage.")

        parser.add_argument(
            'id',
            nargs='?',
            help=
            'Job id, e.g. user/modelname/0db75a64acb74c27bd72c22e359de7a4c44a20e5 to start a pre-created job.'
        )
        parser.add_argument('--fetch',
                            action='store_true',
                            help="Fetch job from server.")

        parsed_args = parser.parse_args(args)

        if not parsed_args.id:
            parser.print_help()
            sys.exit(1)

        owner, name, id = unpack_full_job_id(parsed_args.id)

        job_backend = JobBackend(model_name=owner + '/' + name)
        job_backend.section('checkout')

        if parsed_args.fetch:
            job_backend.fetch(id)

        job_backend.load(id)
        job_backend.start()

        start_keras(self.logger, job_backend)