예제 #1
0
 def wrapper(*args, **kwargs):
     try:
         # check the config session existence
         if config and config[conf_sess_name]:
             return f(*args, **kwargs)
     except KeyError as e:
         logger.warning('No such config session in myeconfig')
예제 #2
0
    def run(self, resources):
        if self.skip:
            self.metadata.update({'status': 'Done'})
            self.metadata.update({'data': 0})
            add_tag(self.dataset_id, {
                self.reader_type: self.metadata,
            })
            self.status = DONE
            return

        args = self._task_arguments()
        self.p = subprocess.Popen(
            args,
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            close_fds=True,
        )

        unknown_output = []
        self.before_run()
        try:
            while self.p.poll() is None:
                for line in utils.nonblocking_readlines(self.p.stdout):
                    if self.aborted.is_set():
                        self.p.send_signal(signal.SIGKILL)
                    if line is not None:
                        self.output_log.write(line)
                        line = line.strip()
                    if line:
                        if not self.process_output(line):
                            unknown_output.append(line)
                        else:
                            time.sleep(0.05)
                    time.sleep(0.01)  # do not remove this line.
        except Exception as e:
            self.p.terminate()
            self.status = ERROR
            logger.warning('Convert dataset {} to {} fail. {}'.format(
                self.dataset_id, self.reader_type, str(e)))
            logger.debug(traceback.format_exc(e))
            logger.debug(unknown_output)
            raise
        finally:
            self.after_run()

        if self.p.returncode != 0:
            self.status = ERROR
            logger.warning(
                'Convert dataset {} to {} fail. return code {}, {}'.format(
                    self.dataset_id, self.reader_type, self.p.returncode,
                    self.error_message))
            logger.debug(unknown_output)
        else:
            self.status = DONE
예제 #3
0
def inference(id, image):
    svc = scheduler.get_instance(id)
    # Connect to server by zmq
    try:
        context = zmq.Context()
        socket = context.socket(zmq.REQ)
        address = "ipc://{}/unix.socket".format(svc._dir)
        logger.debug("Connecting to {}".format(address))
        socket.connect(address)
        socket.send(image)
        # waiting for reply objects
        message = socket.recv()
    except Exception as e:
        logger.warning('Inference fail: {}'.format(e))
        return []
    logger.debug("Reply: {}".format(message))
    return json.loads(message)
예제 #4
0
 def save(self):
     try:
         if not os.path.exists(self._dir):
             return
         tmpfile_path = self.path(self.SAVE_INFO + '.tmp')
         with open(tmpfile_path, 'wb') as tmpfile:
             data = json.dumps(self._dict, sort_keys=True, indent=4)
             tmpfile.write(data)
         file_path = self.path(self.SAVE_INFO)
         shutil.move(tmpfile_path, file_path)
     except KeyboardInterrupt:
         pass
     except Exception as e:
         logger.warning('Caught %s while saving run %s: %s' %
                        (type(e).__name__, self.id, e))
         logger.debug(traceback.format_exc(e))
     return False
예제 #5
0
    def run(self, resources):
        self.before_run()

        host, repo, image = self.parse_tag(self.image_tag)
        logger.info("Pulling {}... ".format(self.image_tag))
        try:
            cli = docker.APIClient(base_url='unix://var/run/docker.sock')
            with app.app_context():
                registry = get_registry(host)
                registry.login(cli)

            for line in cli.pull(self.image_tag, stream=True, decode=True):
                if "progressDetail" in line:
                    progress = line["progressDetail"]
                    if "current" in progress:
                        percentag = float(progress['current']) / float(
                            progress['total'])
                        self.progress = percentag
        except Exception as e:
            logger.warning("failed to pull image, {}".format(e))
        logger.info("done.")
        self.after_run()
        self.status = DONE
예제 #6
0
def new(username, name, image_tag, dataset_path, user_args,
        num_gpu, project, repo_path, parameters, parent):
    while True:
        id = RUNS_PREFIX + JOBS_PREFIX + str(uuid.uuid4()).replace('-', '')[:8]
        job_dir = os.path.join(RUNS_DIR, id)
        if not os.path.exists(job_dir):
            break
    try:
        args = user_args.split()
        for n, arg in enumerate(args):
            if arg.startswith("bk/"):
                args[n] = arg.replace('bk/', '/data/dataset/')
        if dataset_path:
            if dataset_path.startswith("bk/"):
                dataset_path = dataset_path.replace('bk/', '/data/dataset/')
            if not os.path.exists(dataset_path):
                raise ValueError("Cannot find dataset {}".format(dataset_path))
        inst = Instance(id=id,
                        username=username,
                        name=name,
                        image_tag=image_tag,
                        dataset_path=dataset_path,
                        user_args=args,
                        num_gpu=num_gpu,
                        project=project,
                        status_history=[],
                        repo_path=repo_path,
                        parameters=parameters,
                        parent=parent,
                        child=[])
        logger.debug("Create instance {}".format(inst.id))
    except Exception as e:
        logger.warning('Caught %s while creating instance %s: %s' % (type(e).__name__, id, e))
        logger.debug(traceback.format_exc(e))
        raise e
    return inst
예제 #7
0
    def run(self, resources):
        self.before_run()
        env = os.environ.copy()
        env['PYTHONPATH'] = os.pathsep.join(
            ['.', self._dir, env.get('PYTHONPATH', '')] + sys.path)
        gpus = [i for (i, _) in resources['gpus']]
        env['CUDA_VISIBLE_DEVICES'] = ','.join(str(g) for g in gpus)
        root = os.path.dirname(os.path.abspath(myelindl.__file__))
        args = [
            sys.executable,
            '-m',
            os.path.join(root, 'tools', 'unix_server'),
            '--checkpoint-path=%s' % self.checkpoint_path,
            '--job-dir=%s' % self._dir,
        ]
        logger.debug("run args: {}".format(args))
        self.p = subprocess.Popen(
            args,
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            cwd=self._dir,
            close_fds=True,
            env=env,
        )
        try:
            sigterm_time = None  # When was the SIGTERM signal sent
            sigterm_timeout = 120  # When should the SIGKILL signal be sent
            while self.p.poll() is None:
                for line in utils.nonblocking_readlines(self.p.stdout):
                    # for line in self.p.stdout:
                    if self.aborted.is_set():
                        if sigterm_time is None:
                            # Attempt graceful shutdown
                            self.p.send_signal(signal.SIGTERM)
                            sigterm_time = time.time()
                            self.status = ABORT
                        break
                    if line is not None:
                        # Remove whitespace
                        line = line.strip()

                    if line:
                        self.output_log.write('%s\n' % line)
                        self.output_log.flush()
                    else:
                        time.sleep(0.05)
                if sigterm_time is not None and (time.time() - sigterm_time >
                                                 sigterm_timeout):
                    self.p.send_signal(signal.SIGKILL)
                    logger.debug('Sent SIGKILL to task "%s"' % self.name)
                time.sleep(0.01)
        except Exception as e:
            logger.warning('service exception: {}'.format(e))
            self.p.terminate()
            self.after_run()
            raise e

        self.after_run()
        if self.status != RUN:
            return False
        if self.p.returncode != 0:
            self.returncode = self.p.returncode
            self.status = ERROR
        else:
            self.status = DONE
        return True
예제 #8
0
def myconfig_value(keyword, def_value):
    try:
        return myconfig[SENAME][keyword]
    except KeyError as e:
        logger.warning('Key Error: %s, use def_value', keyword)
        return def_value