Ejemplo n.º 1
0
def _req(route_name, params, op, raw_response):
    """Make request and parse result

    Args:
        route_name (str): string name of route
        params (dict): parameters to apply to route
        op (func): how to request

    Returns:
        object: parsed JSON result
    """
    from sirepo import simulation_db

    uri = None
    resp = None
    try:
        uri = _uri(route_name, params)
        resp = op(uri)
        if raw_response:
            return resp
        return simulation_db.json_load(resp.data)
    except Exception as e:
        pkdlog('{}: uri={} resp={}', e, uri, resp)
        pkdexc()
        raise
Ejemplo n.º 2
0
 async def _fastcgi_read(self, connection):
     s = None
     m = None
     try:
         s = tornado.iostream.IOStream(
             connection,
             max_buffer_size=job.cfg.max_message_bytes,
         )
         while True:
             m = await self._fastcgi_msg_q.get()
             # Avoid issues with exceptions. We don't use q.join()
             # so not an issue to call before work is done.
             self._fastcgi_msg_q.task_done()
             await s.write(pkjson.dump_bytes(m) + b'\n')
             await self.job_cmd_reply(
                 m,
                 job.OP_ANALYSIS,
                 await s.read_until(b'\n', job.cfg.max_message_bytes),
             )
     except Exception as e:
         pkdlog('msg={} error={} stack={}', m, e, pkdexc())
         # If self.fastcgi_cmd is None we initiated the kill so not an error
         if not self.fastcgi_cmd:
             return
         await self._fastcgi_handle_error(m, e, pkdexc())
     finally:
         if s:
             s.close()
Ejemplo n.º 3
0
def default_command(in_file):
    """Reads `in_file` passes to `msg.jobCmd`

    Must be called in run_dir

    Writes its output on stdout.

    Args:
        in_file (str): json parsed to msg
    Returns:
        str: json output of command, e.g. status msg
    """
    try:
        job.init()
        f = pkio.py_path(in_file)
        msg = pkjson.load_any(f)
        #TODO(e-carlin): find common place to serialize/deserialize paths
        msg.runDir = pkio.py_path(msg.runDir)
        f.remove()
        res = globals()['_do_' + msg.jobCmd](msg,
                                             sirepo.template.import_module(
                                                 msg.simulationType))
        if res is None:
            return
        r = PKDict(res).pksetdefault(state=job.COMPLETED)
    except Exception as e:
        r = PKDict(
            state=job.ERROR,
            error=e.sr_args.error
            if isinstance(e, sirepo.util.UserAlert) else str(e),
            stack=pkdexc(),
        )
    return pkjson.dump_pretty(r, pretty=False)
Ejemplo n.º 4
0
def _on_do_compute_exit(success_exit, is_parallel, template, run_dir):
    # locals() must be called before anything else so we only get the function
    # arguments
    kwargs = locals()

    def _failure_exit():
        a = _post_processing()
        if not a:
            f = run_dir.join(template_common.RUN_LOG)
            if f.exists():
                a = _parse_python_errors(pkio.read_text(f))
        if not a:
            a = 'non-zero exit code'
        return PKDict(state=job.ERROR, error=a)

    def _post_processing():
        if hasattr(template, 'post_execution_processing'):
            return template.post_execution_processing(**kwargs)
        return None

    def _success_exit():
        return PKDict(
            state=job.COMPLETED,
            alert=_post_processing(),
        )

    try:
        return _success_exit() if success_exit else _failure_exit()
    except Exception as e:
        return PKDict(state=sirepo.job.ERROR, error=e, stack=pkdexc())
Ejemplo n.º 5
0
def api_runCancel():
    try:
        return _request()
    except Exception as e:
        pkdlog('ignoring exception={} stack={}', e, pkdexc())
    # Always true from the client's perspective
    return sirepo.http_reply.gen_json({'state': 'canceled'})
Ejemplo n.º 6
0
def fixup_old_data(data):
    """Upgrade data to latest schema and updates version.

    Args:
        data (dict): to be updated (destructively)

    Returns:
        dict: upgraded `data`
        bool: True if data changed
    """
    try:
        if 'version' in data and data['version'] == SCHEMA_COMMON['version']:
            return data, False
        data['version'] = SCHEMA_COMMON['version']
        if not 'simulationType' in data:
            if 'sourceIntensityReport' in data['models']:
                data['simulationType'] = 'srw'
            elif 'fieldAnimation' in data['models']:
                data['simulationType'] = 'warp'
            elif 'bunchSource' in data['models']:
                data['simulationType'] = 'elegant'
            else:
                pkdlog('simulationType: not found; data={}', data)
                raise AssertionError('must have simulationType')
        if not 'simulationSerial' in data['models']['simulation']:
            data['models']['simulation']['simulationSerial'] = 0
        sirepo.template.import_module(data['simulationType']).fixup_old_data(data)
        try:
            del data['models']['simulationStatus']
        except KeyError:
            pass
        return data, True
    except Exception as e:
        pkdlog('{}: error: {}', data, pkdexc())
        raise
Ejemplo n.º 7
0
    def _end_job(self, job):
        """Free the slot associated with the job

        POSIT: job is locked
        """
        slot = None
        with self.__lock:
            try:
                self.__queued_jobs.remove(job)
                # No slot, just done
                return
            except ValueError:
                pass
            try:
                s = self.__running_slots[job.jid]
                if s.job == job:
                    slot = s
                    s.job = None
                    del self.__running_slots[job.jid]
            except KeyError as e:
                pkdlog(
                    '{}: PROGRAM ERROR: not in running, ignoring job: {}\n{}',
                    job.jid,
                    e,
                    pkdexc(),
                )
            if slot:
                self.__available_slots.append(slot)
                self.__event.set()
Ejemplo n.º 8
0
 async def _run(self, req, op):
     try:
         if self.db.computeJobHash != req.content.computeJobHash:
             pkdlog('invalid computeJobHash self={} req={}',
                    self.db.computeJobHash, req.content.computeJobHash)
             return
         try:
             while True:
                 r = await op.reply_ready()
                 if r.state == job.CANCELED:
                     break
                 self.db.status = r.state
                 if self.db.status == job.ERROR:
                     self.db.error = r.get('error', '<unknown error>')
                 if 'computeJobStart' in r:
                     self.db.computeJobStart = r.computeJobStart
                 if 'parallelStatus' in r:
                     self.db.parallelStatus.update(r.parallelStatus)
                     self.db.lastUpdateTime = r.parallelStatus.lastUpdateTime
                 else:
                     # sequential jobs don't send this
                     self.db.lastUpdateTime = int(time.time())
                     #TODO(robnagler) will need final frame count
                 self.__db_write()
                 if r.state in job.EXIT_STATUSES:
                     break
         except Exception as e:
             pkdlog('error={} stack={}', e, pkdexc())
             self.db.status = job.ERROR
             self.db.error = e
     finally:
         self.destroy_op(op)
Ejemplo n.º 9
0
    async def loop(self):
        while True:
            self._websocket = None
            try:
#TODO(robnagler) connect_timeout, max_message_size, ping_interval, ping_timeout
                self._websocket = await tornado.websocket.websocket_connect(
                    tornado.httpclient.HTTPRequest(
                        url=cfg.supervisor_uri,
                        validate_cert=sirepo.job.cfg.verify_tls,
                    ),
                )
                m = self.format_op(None, job.OP_ALIVE)
                while True:
                    if m and not await self.send(m):
                        break
                    m = await self._websocket.read_message()
                    if m is None:
                        raise ValueError('response from supervisor was None')
                    m = await self._op(m)
            except Exception as e:
                pkdlog('error={} stack={}', e, pkdexc())
                # TODO(e-carlin): exponential backoff?
                await tornado.gen.sleep(_RETRY_SECS)
            finally:
                if self._websocket:
                    self._websocket.close()
Ejemplo n.º 10
0
 async def _agent_start(self, op):
     if self._agent_starting_timeout:
         return
     async with self._agent_start_lock:
         # POSIT: we do not have to raise Awaited(), because
         # this is the first thing an op waits on.
         if self._agent_starting_timeout or self._websocket_ready.is_set():
             return
         try:
             t = self.cfg.agent_starting_secs
             if pkconfig.channel_in_internal_test():
                 x = op.msg.pkunchecked_nested_get(
                     'data.models.dog.favoriteTreat')
                 if x:
                     x = re.search(r'agent_start_delay=(\d+)', x)
                     if x:
                         self._agent_start_delay = int(x.group(1))
                         t += self._agent_start_delay
                         pkdlog('op={} agent_start_delay={}', op,
                                self._agent_start_delay)
             pkdlog('{} {} await _do_agent_start', self, op)
             # All awaits must be after this. If a call hangs the timeout
             # handler will cancel this task
             self._agent_starting_timeout = tornado.ioloop.IOLoop.current(
             ).call_later(
                 t,
                 self._agent_starting_timeout_handler,
             )
             # POSIT: Canceled errors aren't smothered by any of the below calls
             await self.kill()
             await self._do_agent_start(op)
         except Exception as e:
             pkdlog('{} error={} stack={}', self, e, pkdexc())
             self.free_resources(internal_error='failure starting agent')
             raise
Ejemplo n.º 11
0
def _gen_exception_reply_SRException(args):
    r = args.routeName
    p = args.params or PKDict()
    try:
        t = sirepo.http_request.sim_type(p.pkdel('sim_type'))
        s = simulation_db.get_schema(sim_type=t)
    except Exception as e:
        pkdc('exception={} stack={}', e, pkdexc())
        # sim_type is bad so don't cascade errors, just
        # try to get the schema without the type
        t = None
        s = simulation_db.get_schema(sim_type=None)
    # If default route or always redirect/reload
    if r:
        assert r in s.localRoutes, \
            'route={} not found in schema for type={}'.format(r, t)
    else:
        r = sirepo.uri.default_local_route_name(s)
        p = PKDict(reload_js=True)
    if (
            # must be first, to always delete reload_js
            not p.pkdel('reload_js') and flask.request.method == 'POST'
            and r not in _RELOAD_JS_ROUTES):
        pkdc('POST response={} route={} params={}', SR_EXCEPTION_STATE, r, p)
        return gen_json(
            PKDict({
                _STATE: SR_EXCEPTION_STATE,
                SR_EXCEPTION_STATE: args,
            }), )
    pkdc('redirect to route={} params={}  type={}', r, p, t)
    return gen_redirect_for_local_route(t, route=r, params=p)
Ejemplo n.º 12
0
def restrict_op_to_first_rank(op):
    """If the process has rank FIRST_RANK, call a function. Otherwise do nothing.

    Use this to call a function that will cause conflicts if called by multiple processes,
    such as writing results to a file

    Args:
        op (function): function to call
    """
    c = None
    r = FIRST_RANK
    res = None
    try:
        import mpi4py.MPI
        c = mpi4py.MPI.COMM_WORLD
        if c.Get_size() > 1:
            r = c.Get_rank()
    except Exception:
        pass
    if r == FIRST_RANK:
        try:
            res = op()
        except Exception as e:
            pkdlog('op={} exception={} stack={}', op, e, pkdexc())
            if c:
                c.Abort(1)
            raise e
    if c:
        res = c.bcast(res, root=FIRST_RANK)
    return res
Ejemplo n.º 13
0
    async def purge_free_simulations(cls):
        def _get_uids_and_files():
            r = []
            u = None
            p = sirepo.auth_db.UserRole.uids_of_paid_users()
            for f in pkio.sorted_glob(
                    _DB_DIR.join('*{}'.format(
                        sirepo.simulation_db.JSON_SUFFIX, ))):
                n = sirepo.sim_data.split_jid(jid=f.purebasename).uid
                if n in p or f.mtime() > _too_old \
                   or f.purebasename in cls._purged_jids_cache:
                    continue
                if u != n:
                    # POSIT: Uid is the first part of each db file. The files are
                    # sorted so this should yield all of a user's files
                    if r:
                        yield u, r
                    u = n
                    r = []
                r.append(f)
            if r:
                yield u, r

        def _purge_sim(jid):
            d = cls.__db_load(jid)
            # OPTIMIZATION: We assume the uids_of_paid_users doesn't change very
            # frequently so we don't need to check again. A user could run a sim
            # at anytime so we need to check that they haven't
            if d.lastUpdateTime > _too_old:
                return
            cls._purged_jids_cache.add(jid)
            if d.status == job.JOB_RUN_PURGED:
                return
            p = sirepo.simulation_db.simulation_run_dir(d)
            pkio.unchecked_remove(p)
            n = cls.__db_init_new(d, d)
            n.status = job.JOB_RUN_PURGED
            cls.__db_write_file(n)

        if not cfg.purge_non_premium_task_secs:
            return
        s = sirepo.srtime.utc_now()
        u = None
        f = None
        try:
            _too_old = (sirepo.srtime.utc_now_as_int() -
                        cfg.purge_non_premium_after_secs)
            with sirepo.auth_db.session():
                for u, v in _get_uids_and_files():
                    with sirepo.auth.set_user_outside_of_http_request(u):
                        for f in v:
                            _purge_sim(jid=f.purebasename)
                    await tornado.gen.sleep(0)
        except Exception as e:
            pkdlog('u={} f={} error={} stack={}', u, f, e, pkdexc())
        finally:
            tornado.ioloop.IOLoop.current().call_later(
                cfg.purge_non_premium_task_secs,
                cls.purge_free_simulations,
            )
Ejemplo n.º 14
0
def _do_download_data_file(msg, template):
    try:
        r = template.get_data_file(
            msg.runDir,
            msg.analysisModel,
            msg.frame,
            options=PKDict(suffix=msg.suffix),
        )
        if not isinstance(r, PKDict):
            if isinstance(r, str):
                r = msg.runDir.join(r, abs=1)
            r = PKDict(filename=r)
        u = r.get('uri')
        if u is None:
            u = r.filename.basename
        c = r.get('content')
        if c is None:
            c = pkcompat.to_bytes(pkio.read_text(r.filename)) \
                if u.endswith(('py', 'txt', 'csv')) \
                else r.filename.read_binary()
        requests.put(
            msg.dataFileUri + u,
            data=c,
            verify=job.cfg.verify_tls,
        ).raise_for_status()
        return PKDict()
    except Exception as e:
        return PKDict(state=job.ERROR, error=e, stack=pkdexc())
Ejemplo n.º 15
0
def test_importer(import_req):
    from pykern import pkcollections
    from pykern import pkjson
    from pykern.pkunit import pkeq
    from sirepo.template import zgoubi
    import sirepo.sim_data

    with pkunit.save_chdir_work() as w:
        for fn in pkio.sorted_glob(pkunit.data_dir().join('*.dat')):
            error = None
            try:
                data = zgoubi.import_file(import_req(fn), unit_test_mode=True)
                sirepo.sim_data.get_class('zgoubi').fixup_old_data(data)
                #TODO(pjm): easier way to convert nested dict to pkcollections.Dict?
                data = pkcollections.json_load_any(pkjson.dump_pretty(data))
            except Exception as e:
                pkdlog(pkdexc())
                error = e.message
            if error:
                actual = error
            else:
                actual = zgoubi.python_source_for_model(data)
            outfile = fn.basename + '.txt'
            pkio.write_text(outfile, actual)
            e = pkunit.data_dir().join(outfile)
            expect = pkio.read_text(e)
            pkeq(expect, actual, 'diff {} {}', e, w.join(outfile))
Ejemplo n.º 16
0
def test_import():
    from pykern import pkjson
    from pykern.pkunit import pkeq
    from sirepo.template import flash_parser
    import re

    def _parse_config(fn):
        return flash_parser.ConfigParser().parse(pkio.read_text(fn))

    def _parse_par(fn):
        data_file = fn.basename.replace('-flash.par', '')
        return flash_parser.ParameterParser().parse(
            pkjson.load_any(
                pkio.read_text(
                    pkunit.data_dir().join(f'{data_file}-sirepo-data.json'))),
            pkio.read_text(fn),
        )

    with pkunit.save_chdir_work():
        for fn in pkio.sorted_glob(pkunit.data_dir().join('*')):
            if re.search(r'-Config$', fn.basename):
                parser = _parse_config
            elif re.search(r'flash.par$', fn.basename):
                parser = _parse_par
            else:
                continue
            try:
                actual = pkjson.dump_pretty(parser(fn))
            except Exception as e:
                pkdlog(pkdexc())
                actual = str(e)
            outfile = f'{fn.basename}.out'
            pkio.write_text(outfile, actual)
            expect = pkio.read_text(pkunit.data_dir().join(outfile))
            pkeq(expect, actual)
Ejemplo n.º 17
0
def api_runCancel():
    jid = None
    try:
        req = http_request.parse_post(id=True,
                                      model=True,
                                      check_sim_exists=True)
        jid = req.sim_data.parse_jid(req.req_data)
        # TODO(robnagler) need to have a way of listing jobs
        # Don't bother with cache_hit check. We don't have any way of canceling
        # if the parameters don't match so for now, always kill.
        #TODO(robnagler) mutex required
        if runner.job_is_processing(jid):
            run_dir = simulation_db.simulation_run_dir(req.req_data)
            # Write first, since results are write once, and we want to
            # indicate the cancel instead of the termination error that
            # will happen as a result of the kill.
            try:
                simulation_db.write_result({'state': 'canceled'},
                                           run_dir=run_dir)
            except Exception as e:
                if not pykern.pkio.exception_is_not_found(e):
                    raise
                # else: run_dir may have been deleted
            runner.job_kill(jid)
            # TODO(robnagler) should really be inside the template (t.cancel_simulation()?)
            # the last frame file may not be finished, remove it
            t = sirepo.template.import_module(req.req_data)
            if hasattr(t, 'remove_last_frame'):
                t.remove_last_frame(run_dir)
    except Exception as e:
        pkdlog('ignoring exception={} jid={} stack={}', e, jid, pkdexc())
    # Always true from the client's perspective
    return http_reply.gen_json({'state': 'canceled'})
Ejemplo n.º 18
0
 async def loop(self):
     while True:
         self._websocket = None
         try:
             #TODO(robnagler) connect_timeout, ping_interval, ping_timeout
             self._websocket = await tornado.websocket.websocket_connect(
                 tornado.httpclient.HTTPRequest(
                     url=cfg.supervisor_uri,
                     validate_cert=sirepo.job.cfg.verify_tls,
                 ),
                 max_message_size=job.cfg.max_message_bytes,
                 ping_interval=job.cfg.ping_interval_secs,
                 ping_timeout=job.cfg.ping_timeout_secs,
             )
             s = self.format_op(None, job.OP_ALIVE)
             while True:
                 if s and not await self.send(s):
                     break
                 r = await self._websocket.read_message()
                 if r is None:
                     pkdlog(
                         'websocket closed in response to len={} send={}',
                         s and len(s),
                         s,
                     )
                     raise tornado.iostream.StreamClosedError()
                 s = await self._op(r)
         except Exception as e:
             pkdlog('error={} stack={}', e, pkdexc())
             # TODO(e-carlin): exponential backoff?
             await tornado.gen.sleep(_RETRY_SECS)
         finally:
             if self._websocket:
                 self._websocket.close()
Ejemplo n.º 19
0
def api_importFile(simulation_type):
    """
    Args:
        simulation_type (str): which simulation type
    Params:
        file: file data
        folder: where to import to
    """
    import sirepo.importer

    error = None
    f = None

    try:
        f = flask.request.files.get('file')
        if not f:
            raise sirepo.util.Error('must supply a file')
        req = http_request.parse_params(
            filename=f.filename,
            folder=flask.request.form.get('folder'),
            id=flask.request.form.get('simulationId'),
            template=True,
            type=simulation_type,
        )
        req.file_stream = f.stream
        req.import_file_arguments = flask.request.form.get('arguments', '')

        def s(data):
            data.models.simulation.folder = req.folder
            data.models.simulation.isExample = False
            return _save_new_and_reply(data)

        if pkio.has_file_extension(req.filename, 'json'):
            data = sirepo.importer.read_json(req.file_stream.read(), req.type)
        #TODO(pjm): need a separate URI interface to importer, added exception for rs4pi for now
        # (dicom input is normally a zip file)
        elif pkio.has_file_extension(req.filename,
                                     'zip') and req.type != 'rs4pi':
            data = sirepo.importer.read_zip(req.file_stream, sim_type=req.type)
        else:
            if not hasattr(req.template, 'import_file'):
                raise sirepo.util.Error('Only zip files are supported')
            with simulation_db.tmp_dir() as d:
                data = req.template.import_file(req, tmp_dir=d, reply_op=s)
            if 'error' in data:
                return http_reply.gen_json(data)
        return s(data)
    except werkzeug.exceptions.HTTPException:
        raise
    except sirepo.util.Reply:
        raise
    except Exception as e:
        pkdlog('{}: exception: {}', f and f.filename, pkdexc())
        #TODO(robnagler) security issue here. Really don't want to report errors to user
        error = str(e.args) if hasattr(e, 'args') else str(e)
    return http_reply.gen_json({
        'error':
        error if error else 'An unknown error occurred',
    })
Ejemplo n.º 20
0
def _catch_and_log_errors(exc_type, msg, *args, **kwargs):
    try:
        yield
    except trio.MultiError as multi_exc:
        raise AssertionError('handle MultiErrors in _catch_and_log_errors')
    except exc_type:
        pkdlog(msg, *args, **kwargs)
        pkdlog(pkdexc())
Ejemplo n.º 21
0
def _run_shadow():
    """Run shadow program with isolated locals()
    """
    try:
        exec(_script(), locals(), locals())
    except Exception:
        pkdlog('script={} error={}', _script(), pkdexc())
    return beam
Ejemplo n.º 22
0
def _run_shadow():
    """Run shadow program with isolated locals()
    """
    try:
        exec(_script(), locals(), locals())
    except Exception:
        pkdlog('script={} error={}', _script(), pkdexc())
    return beam
Ejemplo n.º 23
0
 def on_close(self):
     try:
         d = getattr(self, 'sr_driver', None)
         if d:
             del self.sr_driver
             d.websocket_on_close()
     except Exception as e:
         pkdlog('error={} {}', e, pkdexc())
Ejemplo n.º 24
0
def start_sbatch():
    def get_host():
        h = socket.gethostname()
        if '.' not in h:
            h = socket.getfqdn()
        return h

    def kill_agent(pid_file):
        if get_host() == pid_file.host:
            os.kill(pid_file.pid, signal.SIGKILL)
        else:
            try:
                subprocess.run(
                    ('ssh', pid_file.host, 'kill', '-KILL', str(pid_file.pid)),
                    capture_output=True,
                    text=True,
                ).check_returncode()
            except subprocess.CalledProcessError as e:
                if '({}) - No such process'.format(
                        pid_file.pid) not in e.stderr:
                    pkdlog('cmd={cmd} returncode={returncode} stderr={stderr}',
                           **vars(e))

    f = None
    try:
        f = pkjson.load_any(pkio.py_path(_PID_FILE))
    except Exception as e:
        if not pkio.exception_is_not_found(e):
            pkdlog('error={} stack={}', e, pkdexc())
    try:
        if f:
            kill_agent(f)
    except Exception as e:
        pkdlog('error={} stack={}', e, pkdexc())
    pkjson.dump_pretty(
        PKDict(
            host=get_host(),
            pid=os.getpid(),
        ),
        _PID_FILE,
    )
    try:
        start()
    finally:
        #TODO(robnagler) https://github.com/radiasoft/sirepo/issues/2195
        pkio.unchecked_remove(_PID_FILE)
Ejemplo n.º 25
0
def import_python(code, tmp_dir, user_filename=None, arguments=None):
    """Converts script_text into json and stores as new simulation.

    Avoids too much data back to the user in the event of an error.
    This could be a potential security issue, because the script
    could be used to probe the system.

    Args:
        simulation_type (str): always "srw", but used to find lib dir
        code (str): Python code that runs SRW
        user_filename (str): uploaded file name for log
        arguments (str): argv to be passed to script

    Returns:
        dict: simulation data
    """
    script = None

    # Patch for the mirror profile for the exported .py file from Sirepo:
    code = _patch_mirror_profile(code)

    try:
        with pkio.save_chdir(tmp_dir):
            # This string won't show up anywhere
            script = pkio.write_text(
                'in.py',
                re.sub(r'^main\(', '#', code, flags=re.MULTILINE),
            )
            o = SRWParser(
                script,
                user_filename=user_filename,
                arguments=arguments,
            )
            return o.data
    except Exception as e:
        lineno = script and _find_line_in_trace(script)
        if hasattr(e, 'args'):
            if len(e.args) == 1:
                m = str(e.args[0])
            elif e.args:
                m = str(e.args)
            else:
                m = e.__class__.__name__
        else:
            m = str(e)
        pkdlog(
            'Error: {}; exception={}; script={}; filename={}; stack:\n{}',
            m,
            e.__class__.__name__,
            script,
            user_filename,
            pkdexc(),
        )
        m = m[:50]
        raise ValueError(
            'Error on line {}: {}'.format(lineno, m) if lineno
            else 'Error: {}'.format(m),
        )
Ejemplo n.º 26
0
 def run(self):
     """Start jobs if slots available else check for available"""
     pkdlog(
         '{}: {} available={}',
         self.name,
         self.__kind,
         len(self.__available_slots),
     )
     while True:
         self.__event.wait(_SLOT_MANAGER_POLL_SECS)
         got_one = False
         while True:
             with self.__lock:
                 self.__event.clear()
                 if not (self.__queued_jobs and self.__available_slots):
                     if self.__queued_jobs:
                         pkdlog(
                             'waiting: queue={} available={}',
                             [x.jid for x in self.__queued_jobs],
                             [str(x) for x in self.__available_slots],
                         )
                     break
                 j = self.__queued_jobs.pop(0)
                 s = self.__available_slots.pop(0)
                 s.job = j
                 self.__running_slots[j.jid] = s
             # have to release slot lock before locking job
             try:
                 with j.lock:
                     if j._is_state_ok_to_start():
                         j._slot_start(s)
                         got_one = True
             except Exception as e:
                 j._error_during_start(e, pkdexc())
                 try:
                     j.kill()
                 except Exception as e:
                     pkdlog(
                         '{}: error during cleanup after error: {}\n{}',
                         j.jid,
                         e,
                         pkdexc(),
                     )
         if not got_one:
             self._poll_running_jobs()
Ejemplo n.º 27
0
 def set_job_status(self, status):
     self.computeJob.set_status(self, status)
     try:
         yield
         self.computeJob.set_status(self, None)
     except Exception as e:
         pkdlog('{} status={} stack={}', self, status, pkdexc())
         self.computeJob.set_status(self, None, exception=e)
         raise
Ejemplo n.º 28
0
def _do_get_simulation_frame(msg, template):
    try:
        return template_common.sim_frame_dispatch(
            msg.data.copy().pkupdate(run_dir=msg.runDir), )
    except Exception as e:
        r = 'report not generated'
        if isinstance(e, sirepo.util.UserAlert):
            r = e.sr_args.error
        return PKDict(state=job.ERROR, error=r, stack=pkdexc())
Ejemplo n.º 29
0
 def set_job_situation(self, situation):
     self.computeJob.set_situation(self, situation)
     try:
         yield
         self.computeJob.set_situation(self, None)
     except Exception as e:
         pkdlog('{} situation={} stack={}', self, situation, pkdexc())
         self.computeJob.set_situation(self, None, exception=e)
         raise
Ejemplo n.º 30
0
def call_api(func_or_name, kwargs=None, data=None):
    """Call another API with permission checks.

    Note: also calls `save_to_cookie`.

    Args:
        func_or_name (object): api function or name (without `api_` prefix)
        kwargs (dict): to be passed to API [None]
        data (dict): will be returned `http_request.parse_json`
    Returns:
        flask.Response: result
    """
    p = None
    s = None
    try:
        # must be first so exceptions have access to sim_type
        if kwargs:
            # Any (GET) uri will have simulation_type in uri if it is application
            # specific.
            s = sirepo.http_request.set_sim_type(kwargs.get('simulation_type'))
        f = func_or_name if callable(func_or_name) \
            else _api_to_route[func_or_name].func
        sirepo.api_auth.check_api_call(f)
        try:
            if data:
                p = sirepo.http_request.set_post(data)
            r = flask.make_response(f(**kwargs) if kwargs else f())
        finally:
            if data:
                sirepo.http_request.set_post(p)
    except Exception as e:
        if isinstance(e,
                      (sirepo.util.Reply, werkzeug.exceptions.HTTPException)):
            pkdc('api={} exception={} stack={}', func_or_name, e, pkdexc())
        else:
            pkdlog('api={} exception={} stack={}', func_or_name, e, pkdexc())
        r = sirepo.http_reply.gen_exception(e)
    finally:
        # http_request tries to keep a valid sim_type so
        # this is ok to call (even if s is None)
        sirepo.http_request.set_sim_type(s)
    sirepo.cookie.save_to_cookie(r)
    sirepo.events.emit('end_api_call', PKDict(resp=r))
    return r
Ejemplo n.º 31
0
    async def purge_free_simulations(cls, init=False):
        def _get_uids_and_files():
            r = []
            u = None
            p = sirepo.auth_db.UserRole.uids_of_paid_users()
            for f in pkio.sorted_glob(_DB_DIR.join('*{}'.format(
                    sirepo.simulation_db.JSON_SUFFIX,
            ))):
                n = sirepo.sim_data.uid_from_jid(f.basename)
                if n in p or f.mtime() > _too_old:
                    continue
                if u != n:
                    # POSIT: Uid is the first part of each db file. The files are
                    # sorted so this should yield all of a user's files
                    if r:
                        yield u, r
                    u = n
                    r = []
                r.append(f)
            if r:
                yield u, r

        def _purge_sim(db_file):
            d = pkcollections.json_load_any(db_file)
            # OPTIMIZATION: We assume the uids_of_paid_users doesn't change very
            # frequently so we don't need to check again. A user could run a sim
            # at anytime so we need to check that they haven't
            if d.lastUpdateTime > _too_old:
                return
            if d.status == job.FREE_USER_PURGED:
                return
            p = sirepo.simulation_db.simulation_run_dir(d)
            pkio.unchecked_remove(p)
            d.status = job.FREE_USER_PURGED
            cls.__db_write_file(d)
            jids_purged.append(db_file.purebasename)

        s = sirepo.srtime.utc_now()
        u = None
        f = None
        try:
            _too_old = sirepo.srtime.utc_now_as_float() - (
                cfg.purge_free_after_days * 24 * 60 * 60
            )

            jids_purged = []
            for u, v in _get_uids_and_files():
                with sirepo.auth.set_user(u):
                    for f in v:
                        _purge_sim(f)
                await tornado.gen.sleep(0)
            pkdlog('jids={}', jids_purged)
        except Exception as e:
            pkdlog('u={} f={} error={} stack={}', u, f, e, pkdexc())
        finally:
            cls._purge_free_simulations_set(s, init)
Ejemplo n.º 32
0
    def _start(self):
        """Detach a process from the controlling terminal and run it in the
        background as a daemon.

        We don't use pksubprocess. This method is not called from the MainThread
        so can't set signals.
        """
        env = _safe_env()
        env['SIREPO_MPI_CORES'] = str(mpi.cfg.cores)
        try:
            pid = os.fork()
        except OSError as e:
            pkdlog('{}: fork OSError: {} errno={}', self.jid, e.strerror, e.errno)
            reraise
        if pid != 0:
            pkdlog('{}: started: pid={} cmd={}', self.jid, pid, self.cmd)
            self.__pid = pid
            return
        try:
            os.chdir(str(self.run_dir))
            #Don't os.setsid() so signals propagate properly
            maxfd = resource.getrlimit(resource.RLIMIT_NOFILE)[1]
            if (maxfd == resource.RLIM_INFINITY):
                maxfd = runner.MAX_OPEN_FILES
            for fd in range(0, maxfd):
                try:
                    os.close(fd)
                except OSError:
                    pass
            sys.stdin = open(template_common.RUN_LOG, 'a+')
            assert sys.stdin.fileno() == 0
            os.dup2(0, 1)
            sys.stdout = os.fdopen(1, 'a+')
            os.dup2(0, 2)
            sys.stderr = os.fdopen(2, 'a+')
            pkdlog('{}: child will exec: {}', self.jid, self.cmd)
            sys.stderr.flush()
            try:
                simulation_db.write_status('running', self.run_dir)
                os.execvpe(self.cmd[0], self.cmd, env=env)
            except BaseException as e:
                pkdlog(
                    '{}: execvp error: {} errno={}',
                    self.jid,
                    e.strerror if hasattr(e, 'strerror') else '',
                    e.errno if hasattr(e, 'errno') else '',
                )
            finally:
                sys.exit(1)
        except BaseException as e:
            # NOTE: there's no lock here so just append to the log. This
            # really shouldn't happen, but it might (out of memory) so just
            # log to the run log and hope somebody notices
            self._error_during_start(e, pkdexc())
            raise
Ejemplo n.º 33
0
def read_result(run_dir):
    """Read result data file from simulation

    Args:
        run_dir (py.path): where to find output

    Returns:
        dict: result or describes error
    """
    fn = json_filename(template_common.OUTPUT_BASE_NAME, run_dir)
    res = None
    err = None
    try:
        res = read_json(fn)
    except Exception as e:
        pkdc('{}: exception={}', fn, e)
        err = pkdexc()
        if pkio.exception_is_not_found(e):
            #TODO(robnagler) change POSIT matches _SUBPROCESS_ERROR_RE
            err = 'ERROR: Terminated unexpectedly'
            # Not found so return run.log as err
            rl = run_dir.join(template_common.RUN_LOG)
            try:
                e = pkio.read_text(rl)
                if _RUN_LOG_CANCEL_RE.search(e):
                    err = None
                elif e:
                    err = e
            except Exception as e:
                if not pkio.exception_is_not_found(e):
                    pkdlog('{}: error reading log: {}', rl, pkdexc())
        else:
            pkdlog('{}: error reading output: {}', fn, err)
    if err:
        return None, err
    if not res:
        res = {}
    if 'state' not in res:
        # Old simulation or other error, just say is canceled so restarts
        res = {'state': 'canceled'}
    return res, None
Ejemplo n.º 34
0
    def _repo(self, repo):
        fn = repo.full_name
        bd = re.sub('/', '-', fn)

        def _clone(suffix):
            base = bd + suffix
            for cmd in [
                ['git', 'clone', '--quiet', '--mirror',
                        _GITHUB_URI + '/' + fn + suffix,
                        base],
                ['tar', 'cJf', base + '.txz', base],
            ]:
                _shell(cmd)
            pkio.unchecked_remove(base)

        def _json(gen, suffix):
            base = bd + suffix
            with open(base, 'wt') as f:
                sep = '['
                for i in gen:
                    f.write(sep)
                    j = i.as_json()
                    assert json.loads(j)
                    f.write(j)
                    sep = ','
                if sep == '[':
                    # Empty iteration
                    f.write(sep)
                f.write(']')
            _shell(['xz', base])

        try:
            _clone('.git')
            if repo.has_issues:
                _json(repo.issues(state='all'), '.issues')
            if repo.has_wiki:
                try:
                    _clone('.wiki.git')
                except subprocess.CalledProcessError as e:
                    if not re.search(_WIKI_ERROR_OK, str(e.output)):
                        raise
            _json(repo.comments(), '.comments')
        except Exception as e:
            pkdlog(
                'ERROR: {} {} {} {} {}',
                fn,
                type(e),
                e,
                getattr(e, 'output', None),
                pkdexc(),
            )
Ejemplo n.º 35
0
    def _from_cookie_header(self, header):
        global _try_beaker_compat

        s = None
        err = None
        try:
            match = re.search(r'\b{}=([^;]+)'.format(cfg.http_name), header)
            if match:
                s = self._decrypt(match.group(1))
                self.update(self._deserialize(s))
                self.incoming_serialized = s
                set_log_user(self.get(_COOKIE_USER))
                return
        except Exception as e:
            if 'crypto' in type(e).__module__:
                # cryptography module exceptions serialize to empty string
                # so just report the type.
                e = type(e)
            err = e
            pkdc(pkdexc())
            # wait for decoding errors until after beaker attempt
        if not self.get(_COOKIE_SENTINEL) and _try_beaker_compat:
            try:
                import sirepo.beaker_compat

                res = sirepo.beaker_compat.update_session_from_cookie_header(header)
                if not res is None:
                    self.clear()
                    self.set_sentinel()
                    self.update(res)
                    err = None
                    set_log_user(self.get(_COOKIE_USER))
            except AssertionError:
                pkdlog('Unconfiguring beaker_compat: {}', pkdexc())
                _try_beaker_compat = False

        if err:
            pkdlog('Cookie decoding failed: {} value={}', err, s)
Ejemplo n.º 36
0
def open_json_file(sim_type, path=None, sid=None, fixup=True):
    """Read a db file and return result

    Args:
        sim_type (str): simulation type (app)
        path (py.path.local): where to read the file
        sid (str): simulation id

    Returns:
        dict: data

    Raises:
        CopyRedirect: if the simulation is in another user's
    """
    if not path:
        path = sim_data_file(sim_type, sid)
    if not os.path.isfile(str(path)):
        global_sid = None
        if sid:
            #TODO(robnagler) workflow should be in server.py,
            # because only valid in one case, not e.g. for opening examples
            # which are not found.
            user_copy_sid = _find_user_simulation_copy(sim_type, sid)
            if find_global_simulation(sim_type, sid):
                global_sid = sid
        if global_sid:
            raise CopyRedirect({
                'redirect': {
                    'simulationId': global_sid,
                    'userCopySimulationId': user_copy_sid,
                },
            })
        util.raise_not_found(
            '{}/{}: global simulation not found',
            sim_type,
            sid,
        )
    data = None
    try:
        with open(str(path)) as f:
            data = json_load(f)
            # ensure the simulationId matches the path
            if sid:
                data['models']['simulation']['simulationId'] = _sid_from_path(path)
    except Exception as e:
        pkdlog('{}: error: {}', path, pkdexc())
        raise
    return fixup_old_data(data)[0] if fixup else data
Ejemplo n.º 37
0
def import_python(code, tmp_dir, lib_dir, user_filename=None, arguments=None):
    """Converts script_text into json and stores as new simulation.

    Avoids too much data back to the user in the event of an error.
    This could be a potential security issue, because the script
    could be used to probe the system.

    Args:
        simulation_type (str): always "srw", but used to find lib dir
        code (str): Python code that runs SRW
        user_filename (str): uploaded file name for log
        arguments (str): argv to be passed to script

    Returns:
        dict: simulation data
    """
    script = None

    # Patch for the mirror profile for the exported .py file from Sirepo:
    code = _patch_mirror_profile(code, lib_dir)

    try:
        with pkio.save_chdir(tmp_dir):
            # This string won't show up anywhere
            script = pkio.write_text('in.py', code)
            o = SRWParser(
                script,
                lib_dir=py.path.local(lib_dir),
                user_filename=user_filename,
                arguments=arguments,
            )
            return o.data
    except Exception as e:
        lineno = script and _find_line_in_trace(script)
        # Avoid
        pkdlog(
            'Error: {}; exception={}; script={}; filename={}; stack:\n{}',
            e.message,
            e,
            script,
            user_filename,
            pkdexc(),
        )
        e = str(e)[:50]
        raise ValueError(
            'Error on line {}: {}'.format(lineno, e) if lineno
            else 'Error: {}'.format(e))
Ejemplo n.º 38
0
def test_importer():
    from pykern import pkcollections
    from pykern import pkio
    from pykern.pkunit import pkeq
    from sirepo.template import elegant

    with pkunit.save_chdir_work():
        for fn in pkio.sorted_glob(pkunit.data_dir().join('*')):
            if not pkio.has_file_extension(fn, ('ele', 'lte')) \
                or fn.basename.endswith('ele.lte'):
                continue
            error = None
            try:
                data = elegant.import_file(FlaskRequest(fn))
            except Exception as e:
                pkdlog(pkdexc())
                error = e.message
            if error:
                actual = error
            else:
                if pkio.has_file_extension(fn, 'lte'):
                    data['models']['commands'] = []
                    actual = '{}{}'.format(
                        elegant._generate_variables(data),
                        elegant.generate_lattice(
                            data,
                            elegant._build_filename_map(data),
                            elegant._build_beamline_map(data),
                            pkcollections.Dict(),
                        ),
                    )
                else:
                    data2 = elegant.import_file(FlaskRequest('{}.lte'.format(fn)), test_data=data)
                    actual = elegant._generate_commands(
                        data2,
                        elegant._build_filename_map(data2),
                        elegant._build_beamline_map(data2),
                        pkcollections.Dict(),
                    )
            outfile = fn.basename + '.txt'
            pkio.write_text(outfile, actual)
            expect = pkio.read_text(pkunit.data_dir().join(outfile))
            #TODO(pjm): this takes too long if there are a lot of diffs
            #assert expect == actual
            pkeq(expect, actual)
Ejemplo n.º 39
0
def fixup_old_data(data, force=False):
    """Upgrade data to latest schema and updates version.

    Args:
        data (dict): to be updated (destructively)
        force (bool): force validation

    Returns:
        dict: upgraded `data`
        bool: True if data changed
    """
    try:
        if not force and 'version' in data and data.version == SCHEMA_COMMON.version:
            return data, False
        try:
            data.fixup_old_version = data.version
        except AttributeError:
            data.fixup_old_version = _OLDEST_VERSION
        data.version = SCHEMA_COMMON.version
        if 'simulationType' not in data:
            if 'sourceIntensityReport' in data.models:
                data.simulationType = 'srw'
            elif 'fieldAnimation' in data.models:
                data.simulationType = 'warppba'
            elif 'bunchSource' in data.models:
                data.simulationType = 'elegant'
            else:
                pkdlog('simulationType: not found; data={}', data)
                raise AssertionError('must have simulationType')
        elif data.simulationType == 'warp':
            data.simulationType = 'warppba'
        elif data.simulationType == 'fete':
            data.simulationType = 'warpvnd'
        if 'simulationSerial' not in data.models.simulation:
            data.models.simulation.simulationSerial = 0
        sirepo.template.import_module(data.simulationType).fixup_old_data(data)
        pkcollections.unchecked_del(data.models, 'simulationStatus')
        pkcollections.unchecked_del(data, 'fixup_old_version')
        return data, True
    except Exception as e:
        pkdlog('{}: error: {}', data, pkdexc())
        raise
Ejemplo n.º 40
0
def _dispatch(path):
    """Called by Flask and routes the base_uri with parameters

    Args:
        path (str): what to route

    Returns:
        Flask.response
    """
    cookie.init()
    try:
        if path is None:
            return _dispatch_call(_empty_route.func, {})
        # werkzeug doesn't convert '+' to ' '
        parts = re.sub(r'\+', ' ', path).split('/')
        try:
            route = _uri_to_route[parts[0]]
            parts.pop(0)
        except KeyError:
            route = _default_route
        kwargs = pkcollections.Dict()
        for p in route.params:
            if not parts:
                if not p.is_optional:
                    raise NotFound('{}: uri missing parameter ({})', path, p.name)
                break
            if p.is_path_info:
                kwargs[p.name] = '/'.join(parts)
                parts = None
                break
            kwargs[p.name] = parts.pop(0)
        if parts:
            raise NotFound('{}: unknown parameters in uri ({})', parts, path)
        return _dispatch_call(route.func, kwargs)
    except NotFound as e:
        util.raise_not_found(e.log_fmt, *e.args, **e.kwargs)
    except Exception as e:
        pkdlog('{}: error: {}', path, pkdexc())
        raise
Ejemplo n.º 41
0
def _simulation_run_status(data, quiet=False):
    """Look for simulation status and output

    Args:
        data (dict): request
        quiet (bool): don't write errors to log

    Returns:
        dict: status response
    """
    try:
        #TODO(robnagler): Lock
        rep = simulation_db.report_info(data)
        is_processing = cfg.job_queue.is_processing(rep.job_id)
        is_running = rep.job_status in _RUN_STATES
        res = {'state': rep.job_status}
        pkdc(
            '{}: is_processing={} is_running={} state={} cached_data={}',
            rep.job_id,
            is_processing,
            is_running,
            rep.job_status,
            bool(rep.cached_data),
        )
        if is_processing and not is_running:
            cfg.job_queue.race_condition_reap(rep.job_id)
            pkdc('{}: is_processing and not is_running', rep.job_id)
            is_processing = False
        if is_processing:
            if not rep.cached_data:
                return _simulation_error(
                    'input file not found, but job is running',
                    rep.input_file,
                )
        else:
            is_running = False
            if rep.run_dir.exists():
                res, err = simulation_db.read_result(rep.run_dir)
                if err:
                    return _simulation_error(err, 'error in read_result', rep.run_dir)
        if simulation_db.is_parallel(data):
            template = sirepo.template.import_module(data)
            new = template.background_percent_complete(
                rep.model_name,
                rep.run_dir,
                is_running,
                simulation_db.get_schema(data['simulationType']),
            )
            new.setdefault('percentComplete', 0.0)
            new.setdefault('frameCount', 0)
            res.update(new)
        res['parametersChanged'] = rep.parameters_changed
        if res['parametersChanged']:
            pkdlog(
                '{}: parametersChanged=True req_hash={} cached_hash={}',
                rep.job_id,
                rep.req_hash,
                rep.cached_hash,
            )
        #TODO(robnagler) verify serial number to see what's newer
        res.setdefault('startTime', _mtime_or_now(rep.input_file))
        res.setdefault('lastUpdateTime', _mtime_or_now(rep.run_dir))
        res.setdefault('elapsedTime', res['lastUpdateTime'] - res['startTime'])
        if is_processing:
            res['nextRequestSeconds'] = simulation_db.poll_seconds(rep.cached_data)
            res['nextRequest'] = {
                'report': rep.model_name,
                'reportParametersHash': rep.cached_hash,
                'simulationId': rep.cached_data['simulationId'],
                'simulationType': rep.cached_data['simulationType'],
            }
        pkdc(
            '{}: processing={} state={} cache_hit={} cached_hash={} data_hash={}',
            rep.job_id,
            is_processing,
            res['state'],
            rep.cache_hit,
            rep.cached_hash,
            rep.req_hash,
        )
    except Exception:
        return _simulation_error(pkdexc(), quiet=quiet)
    return res
Ejemplo n.º 42
0
def check_call_with_signals(cmd, output=None, env=None, msg=None):
    """Run cmd, writing to output.

    stdin is `os.devnull`.

    Passes SIGTERM and SIGINT on to the child process. If `output`
    is a string, it will be opened in write ('w') mode.

    Args:
        cmd (list): passed to subprocess verbatim
        output (file or str): where to write stdout and stderr
        env (dict): environment to use
    """
    assert _is_main_thread(), \
        'subprocesses which require signals need to be started in main thread'
    p = None
    prev_signal = dict([(sig, signal.getsignal(sig)) for sig in _SIGNALS])

    def signal_handler(sig, frame):
        if p:
            p.send_signal(sig)
        ps = prev_signal[sig]
        if ps in (None, signal.SIG_IGN, signal.SIG_DFL):
            return
        ps(sig, frame)

    pid = None
    try:
        stdout = output
        if isinstance(output, six.string_types):
            stdout = open(output, 'w')
        stderr = subprocess.STDOUT if stdout else None
        for sig in _SIGNALS:
            signal.signal(sig, signal_handler)
        p = subprocess.Popen(
            cmd,
            stdin=open(os.devnull),
            stdout=stdout,
            stderr=stderr,
            env=env,
        )
        pid = p.pid
        if msg:
            msg('{}: started: {}', pid, cmd)
        rc = p.wait()
        p = None
        if rc != 0:
            raise RuntimeError('error exit({})'.format(rc))
        if msg:
            msg('{}: normal exit(0): {}', pid, cmd)
    except Exception as e:
        if msg:
            msg('{}: exception: {} {}', pid, cmd, pkdexc())
        raise
    finally:
        for sig in _SIGNALS:
            signal.signal(sig, prev_signal[sig])
        if not p is None:
            if msg:
                msg('{}: terminating: {}', pid, cmd)
            p.terminate()
        if stdout != output:
            stdout.close()
Ejemplo n.º 43
0
 def tag1234():
     try:
         force_error()
     except:
         return pkdexc()