Example #1
0
def _run(job_ini, concurrent_tasks, pdb, loglevel, hc, exports):
    global calc_path
    logging.basicConfig(level=getattr(logging, loglevel.upper()))
    job_inis = job_ini.split(',')
    assert len(job_inis) in (1, 2), job_inis
    monitor = performance.PerformanceMonitor(
        'total runtime', measuremem=True)

    if len(job_inis) == 1:  # run hazard or risk
        oqparam = readinput.get_oqparam(job_inis[0], hc_id=hc)
        if hc and hc < 0:  # interpret negative calculation ids
            calc_ids = datastore.get_calc_ids()
            try:
                hc = calc_ids[hc]
            except IndexError:
                raise SystemExit('There are %d old calculations, cannot '
                                 'retrieve the %s' % (len(calc_ids), hc))
        calc = base.calculators(oqparam, monitor)
        with monitor:
            calc.run(concurrent_tasks=concurrent_tasks, pdb=pdb,
                     exports=exports, hazard_calculation_id=hc)
    else:  # run hazard + risk
        calc = run2(
            job_inis[0], job_inis[1], concurrent_tasks, pdb, exports, monitor)

    logging.info('Total time spent: %s s', monitor.duration)
    logging.info('Memory allocated: %s', general.humansize(monitor.mem))
    monitor.flush()
    print('See the output with hdfview %s' % calc.datastore.hdf5path)
    calc_path = calc.datastore.calc_dir  # used to deduce the .pstat filename
    return calc
Example #2
0
def _run(job_ini, concurrent_tasks, pdb, reuse_input, loglevel, exports,
         params):
    global calc_path
    if 'hazard_calculation_id' in params:
        hc_id = int(params['hazard_calculation_id'])
        if hc_id < 0:  # interpret negative calculation ids
            calc_ids = datastore.get_calc_ids()
            try:
                params['hazard_calculation_id'] = calc_ids[hc_id]
            except IndexError:
                raise SystemExit('There are %d old calculations, cannot '
                                 'retrieve the %s' % (len(calc_ids), hc_id))
        else:
            params['hazard_calculation_id'] = hc_id
    dic = readinput.get_params(job_ini, params)
    # set the logs first of all
    log = logs.init("job", dic, getattr(logging, loglevel.upper()))

    # disable gzip_input
    base.BaseCalculator.gzip_inputs = lambda self: None
    with log, performance.Monitor('total runtime', measuremem=True) as monitor:
        calc = base.calculators(log.get_oqparam(), log.calc_id)
        if reuse_input:  # enable caching
            calc.oqparam.cachedir = datastore.get_datadir()
        calc.run(concurrent_tasks=concurrent_tasks, pdb=pdb, exports=exports)

    logging.info('Total time spent: %s s', monitor.duration)
    logging.info('Memory allocated: %s', general.humansize(monitor.mem))
    print('See the output with silx view %s' % calc.datastore.filename)
    calc_path, _ = os.path.splitext(calc.datastore.filename)  # used below
    return calc
Example #3
0
def run(job_ini, concurrent_tasks=None,
        loglevel='info', hc=None, exports=''):
    """
    Run a calculation. Optionally, set the number of concurrent_tasks
    (0 to disable the parallelization).
    """
    logging.basicConfig(level=getattr(logging, loglevel.upper()))
    job_inis = job_ini.split(',')
    assert len(job_inis) in (1, 2), job_inis
    monitor = performance.Monitor('total', measuremem=True)

    if len(job_inis) == 1:  # run hazard or risk
        oqparam = readinput.get_oqparam(job_inis[0], hc_id=hc)
        if hc and hc < 0:  # interpret negative calculation ids
            calc_ids = datastore.get_calc_ids()
            try:
                hc = calc_ids[hc]
            except IndexError:
                raise SystemExit('There are %d old calculations, cannot '
                                 'retrieve the %s' % (len(calc_ids), hc))
        calc = base.calculators(oqparam, monitor)
        monitor.monitor_dir = calc.datastore.calc_dir
        with monitor:
            calc.run(concurrent_tasks=concurrent_tasks, exports=exports,
                     hazard_calculation_id=hc)
    else:  # run hazard + risk
        calc = run2(
            job_inis[0], job_inis[1], concurrent_tasks, exports, monitor)

    logging.info('Total time spent: %s s', monitor.duration)
    logging.info('Memory allocated: %s', general.humansize(monitor.mem))
    monitor.flush()
    print('See the output with hdfview %s/output.hdf5' %
          calc.datastore.calc_dir)
    return calc
Example #4
0
def show(calc_id, key=None, rlzs=None):
    """
    Show the content of a datastore.

    :param calc_id: numeric calculation ID; if 0, show all calculations
    :param key: key of the datastore
    :param rlzs: flag; if given, print out the realizations in order
    """
    if not calc_id:
        if not os.path.exists(datastore.DATADIR):
            return
        rows = []
        for calc_id in datastore.get_calc_ids(datastore.DATADIR):
            try:
                oq = OqParam.from_(datastore.DataStore(calc_id).attrs)
                cmode, descr = oq.calculation_mode, oq.description
            except:  # invalid datastore directory
                logging.warn('Removed invalid calculation %d', calc_id)
                shutil.rmtree(
                    os.path.join(datastore.DATADIR, 'calc_%s' % calc_id))
            else:
                rows.append((calc_id, cmode, descr))
        for row in sorted(rows, key=lambda row: row[0]):  # by calc_id
            print('#%d %s: %s' % row)
        return
    ds = datastore.DataStore(calc_id)
    if key:
        if key in datastore.view:
            print(datastore.view(key, ds))
            return
        obj = ds[key]
        if hasattr(obj, 'value'):  # an array
            print(write_csv(io.StringIO(), obj.value))
        else:
            print(obj)
        return
    # print all keys
    oq = OqParam.from_(ds.attrs)
    print(
        oq.calculation_mode, 'calculation (%r) saved in %s contains:' %
        (oq.description, ds.calc_dir))
    for key in ds:
        print(key, humansize(ds.getsize(key)))

    # this part is experimental and not tested on purpose
    if rlzs and 'curves_by_trt_gsim' in ds:
        min_value = 0.01  # used in rmsep
        curves_by_rlz, mean_curves = combined_curves(ds)
        dists = []
        for rlz in sorted(curves_by_rlz):
            curves = curves_by_rlz[rlz]
            dist = sum(
                rmsep(mean_curves[imt], curves[imt], min_value)
                for imt in mean_curves.dtype.fields)
            dists.append((dist, rlz))
        for dist, rlz in sorted(dists):
            print('rlz=%s, rmsep=%s' % (rlz, dist))
Example #5
0
def show(what, calc_id=-1):
    """
    Show the content of a datastore.

    :param what: key or view of the datastore
    :param calc_id: numeric calculation ID; if -1, show the last calculation
    """
    if what == 'all':  # show all
        if not os.path.exists(datastore.DATADIR):
            return
        rows = []
        for calc_id in datastore.get_calc_ids(datastore.DATADIR):
            try:
                ds = datastore.read(calc_id)
                oq = ds['oqparam']
                cmode, descr = oq.calculation_mode, oq.description
            except:
                # invalid datastore file, or missing calculation_mode
                # and description attributes, perhaps due to a manual kill
                f = os.path.join(datastore.DATADIR, 'calc_%s.hdf5' % calc_id)
                logging.warn('Unreadable datastore %s', f)
                continue
            else:
                rows.append((calc_id, cmode, descr.encode('utf-8')))
                ds.close()
        for row in sorted(rows, key=lambda row: row[0]):  # by calc_id
            print('#%d %s: %s' % row)
        return
    elif what == 'views':
        for name in sorted(datastore.view):
            print(name)
        return

    ds = datastore.read(calc_id)

    # this part is experimental
    if what == 'rlzs' and 'hcurves' in ds:
        min_value = 0.01  # used in rmsep
        curves_by_rlz, mean_curves = get_hcurves_and_means(ds)
        dists = []
        for rlz, curves in curves_by_rlz.items():
            dist = sum(
                rmsep(mean_curves[imt], curves[imt], min_value)
                for imt in mean_curves.dtype.fields)
            dists.append((dist, rlz))
        print('Realizations in order of distance from the mean curves')
        for dist, rlz in sorted(dists):
            print('%s: rmsep=%s' % (rlz, dist))
    elif what in datastore.view:
        print(datastore.view(what, ds))
    else:
        obj = ds[what]
        if hasattr(obj, 'value'):  # an array
            print(write_csv(io.StringIO(), obj.value))
        else:
            print(obj)
Example #6
0
def show(what, calc_id=-1):
    """
    Show the content of a datastore.

    :param what: key or view of the datastore
    :param calc_id: numeric calculation ID; if -1, show the last calculation
    """
    if what == 'all':  # show all
        if not os.path.exists(datastore.DATADIR):
            return
        rows = []
        for calc_id in datastore.get_calc_ids(datastore.DATADIR):
            try:
                ds = datastore.read(calc_id)
                oq = ds['oqparam']
                cmode, descr = oq.calculation_mode, oq.description
            except:
                # invalid datastore file, or missing calculation_mode
                # and description attributes, perhaps due to a manual kill
                f = os.path.join(datastore.DATADIR, 'calc_%s.hdf5' % calc_id)
                logging.warn('Unreadable datastore %s', f)
                continue
            else:
                rows.append((calc_id, cmode, descr.encode('utf-8')))
                ds.close()
        for row in sorted(rows, key=lambda row: row[0]):  # by calc_id
            print('#%d %s: %s' % row)
        return
    elif what == 'views':
        for name in sorted(datastore.view):
            print(name)
        return

    ds = datastore.read(calc_id)

    # this part is experimental
    if what == 'rlzs' and 'hcurves' in ds:
        min_value = 0.01  # used in rmsep
        curves_by_rlz, mean_curves = get_hcurves_and_means(ds)
        dists = []
        for rlz, curves in curves_by_rlz.items():
            dist = sum(rmsep(mean_curves[imt], curves[imt], min_value)
                       for imt in mean_curves.dtype.fields)
            dists.append((dist, rlz))
        print('Realizations in order of distance from the mean curves')
        for dist, rlz in sorted(dists):
            print('%s: rmsep=%s' % (rlz, dist))
    elif what in datastore.view:
        print(datastore.view(what, ds))
    else:
        obj = ds[what]
        if hasattr(obj, 'value'):  # an array
            print(write_csv(io.StringIO(), obj.value))
        else:
            print(obj)
Example #7
0
def show(calc_id, key=None, rlzs=None):
    """
    Show the content of a datastore.

    :param calc_id: numeric calculation ID; if 0, show all calculations
    :param key: key of the datastore
    :param rlzs: flag; if given, print out the realizations in order
    """
    if not calc_id:
        if not os.path.exists(datastore.DATADIR):
            return
        rows = []
        for calc_id in datastore.get_calc_ids(datastore.DATADIR):
            try:
                oq = OqParam.from_(datastore.DataStore(calc_id).attrs)
                cmode, descr = oq.calculation_mode, oq.description
            except:  # invalid datastore directory
                logging.warn('Removed invalid calculation %d', calc_id)
                shutil.rmtree(os.path.join(
                    datastore.DATADIR, 'calc_%s' % calc_id))
            else:
                rows.append((calc_id, cmode, descr))
        for row in sorted(rows, key=lambda row: row[0]):  # by calc_id
            print('#%d %s: %s' % row)
        return
    ds = datastore.DataStore(calc_id)
    if key:
        if key in datastore.view:
            print(datastore.view(key, ds))
            return
        obj = ds[key]
        if hasattr(obj, 'value'):  # an array
            print(write_csv(io.StringIO(), obj.value))
        else:
            print(obj)
        return
    # print all keys
    oq = OqParam.from_(ds.attrs)
    print(oq.calculation_mode, 'calculation (%r) saved in %s contains:' %
          (oq.description, ds.hdf5path))
    for key in ds:
        print(key, humansize(ds.getsize(key)))

    # this part is experimental and not tested on purpose
    if rlzs and 'curves_by_trt_gsim' in ds:
        min_value = 0.01  # used in rmsep
        curves_by_rlz, mean_curves = combined_curves(ds)
        dists = []
        for rlz in sorted(curves_by_rlz):
            curves = curves_by_rlz[rlz]
            dist = sum(rmsep(mean_curves[imt], curves[imt], min_value)
                       for imt in mean_curves.dtype.fields)
            dists.append((dist, rlz))
        for dist, rlz in sorted(dists):
            print('rlz=%s, rmsep=%s' % (rlz, dist))
Example #8
0
def purge(calc_id):
    """
    Remove the given calculation. If you want to remove all calculations,
    use oq reset.
    """
    if calc_id < 0:
        try:
            calc_id = datastore.get_calc_ids()[calc_id]
        except IndexError:
            print('Calculation %d not found' % calc_id)
            return
    purge_one(calc_id, getpass.getuser())
Example #9
0
def main(what='contents', calc_id: str_or_int = -1, extra=()):
    """
    Show the content of a datastore (by default the last one).
    """
    datadir = datastore.get_datadir()
    if what == 'all':  # show all
        if not os.path.exists(datadir):
            return
        rows = []
        for calc_id in datastore.get_calc_ids(datadir):
            try:
                ds = datastore.read(calc_id)
                oq = ds['oqparam']
                cmode, descr = oq.calculation_mode, oq.description
            except Exception:
                # invalid datastore file, or missing calculation_mode
                # and description attributes, perhaps due to a manual kill
                f = os.path.join(datadir, 'calc_%s.hdf5' % calc_id)
                logging.warning('Unreadable datastore %s', f)
                continue
            else:
                rows.append((calc_id, cmode, descr.encode('utf-8')))
        for row in sorted(rows, key=lambda row: row[0]):  # by calc_id
            print('#%d %s: %s' % row)
        return

    ds = datastore.read(calc_id)

    # this part is experimental
    if view.keyfunc(what) in view:
        print(view(what, ds))
    elif what.split('/', 1)[0] in extract:
        obj = extract(ds, what, *extra)
        if isinstance(obj, hdf5.ArrayWrapper):
            print_(obj)
        elif hasattr(obj, 'dtype') and obj.dtype.names:
            print(writers.write_csv(io.StringIO(), obj))
        else:
            print(obj)
    elif what in ds:
        obj = ds.getitem(what)
        if '__pdcolumns__' in obj.attrs:
            df = ds.read_df(what)
            print(df.sort_values(df.columns[0]))
        elif hasattr(obj, 'items'):  # is a group of datasets
            print(obj)
        else:  # is a single dataset
            obj.refresh()  # for SWMR mode
            print_(hdf5.ArrayWrapper.from_(obj))
    else:
        print('%s not found' % what)

    ds.close()
Example #10
0
def get_calc_id(job_id=None):
    """
    Return the latest calc_id by looking both at the datastore
    and the database.
    """
    calcs = datastore.get_calc_ids(datastore.DATADIR)
    calc_id = 0 if not calcs else calcs[-1]
    if job_id is None:
        try:
            job_id = models.OqJob.objects.latest('id').id
        except exceptions.ObjectDoesNotExist:
            job_id = 0
    return max(calc_id, job_id)
Example #11
0
def get_calc_id(datadir, job_id=None):
    """
    Return the latest calc_id by looking both at the datastore
    and the database.
    """
    calcs = datastore.get_calc_ids(datadir)
    calc_id = 0 if not calcs else calcs[-1]
    if job_id is None:
        try:
            job_id = models.OqJob.objects.latest('id').id
        except exceptions.ObjectDoesNotExist:
            job_id = 0
    return max(calc_id, job_id)
Example #12
0
def purge(calc_id):
    """
    Remove the given calculation. If calc_id is 0, remove all calculations.
    """
    user = getpass.getuser()
    if not calc_id:
        for fname in os.listdir(datastore.DATADIR):
            mo = re.match('calc_(\d+)\.hdf5', fname)
            if mo is not None:
                calc_id = int(mo.group(1))
                purge_one(calc_id, user)
    else:
        if calc_id < 0:
            calc_id = datastore.get_calc_ids()[calc_id]
        purge_one(calc_id, user)
Example #13
0
def get_calc_id(db, datadir, job_id=None):
    """
    Return the latest calc_id by looking both at the datastore
    and the database.

    :param db: a :class:`openquake.server.dbapi.Db` instance
    :param datadir: the directory containing the datastores
    :param job_id: a job ID; if None, returns the latest job ID
    """
    calcs = datastore.get_calc_ids(datadir)
    calc_id = 0 if not calcs else calcs[-1]
    if job_id is None:
        try:
            job_id = db('SELECT seq FROM sqlite_sequence WHERE name="job"',
                        scalar=True)
        except NotFound:
            job_id = 0
    return max(calc_id, job_id)
Example #14
0
def get_calc_id(db, datadir, job_id=None):
    """
    Return the latest calc_id by looking both at the datastore
    and the database.

    :param db: a :class:`openquake.server.dbapi.Db` instance
    :param datadir: the directory containing the datastores
    :param job_id: a job ID; if None, returns the latest job ID
    """
    calcs = datastore.get_calc_ids(datadir)
    calc_id = 0 if not calcs else calcs[-1]
    if job_id is None:
        try:
            job_id = db('SELECT seq FROM sqlite_sequence WHERE name="job"',
                        scalar=True)
        except NotFound:
            job_id = 0
    return max(calc_id, job_id)
Example #15
0
def _run(job_inis, concurrent_tasks, calc_id, pdb, reuse_input, loglevel,
         exports, params):
    global calc_path
    assert len(job_inis) in (1, 2), job_inis
    # set the logs first of all
    calc_id = logs.init(calc_id, getattr(logging, loglevel.upper()))
    # disable gzip_input
    base.BaseCalculator.gzip_inputs = lambda self: None
    with performance.Monitor('total runtime', measuremem=True) as monitor:
        if os.environ.get('OQ_DISTRIBUTE') not in ('no', 'processpool'):
            os.environ['OQ_DISTRIBUTE'] = 'processpool'
        if len(job_inis) == 1:  # run hazard or risk
            if 'hazard_calculation_id' in params:
                hc_id = int(params['hazard_calculation_id'])
            else:
                hc_id = None
            if hc_id and hc_id < 0:  # interpret negative calculation ids
                calc_ids = datastore.get_calc_ids()
                try:
                    params['hazard_calculation_id'] = str(calc_ids[hc_id])
                except IndexError:
                    raise SystemExit(
                        'There are %d old calculations, cannot '
                        'retrieve the %s' % (len(calc_ids), hc_id))
            oqparam = readinput.get_oqparam(job_inis[0], kw=params)
            calc = base.calculators(oqparam, calc_id)
            if reuse_input:  # enable caching
                oqparam.cachedir = datastore.get_datadir()
            calc.run(concurrent_tasks=concurrent_tasks, pdb=pdb,
                     exports=exports)
        else:  # run hazard + risk
            calc = run2(
                job_inis[0], job_inis[1], calc_id, concurrent_tasks, pdb,
                reuse_input, loglevel, exports, params)

    logging.info('Total time spent: %s s', monitor.duration)
    logging.info('Memory allocated: %s', general.humansize(monitor.mem))
    print('See the output with silx view %s' % calc.datastore.filename)
    calc_path, _ = os.path.splitext(calc.datastore.filename)  # used below
    return calc
Example #16
0
def _run(job_ini, concurrent_tasks, pdb, loglevel, hc, exports, params):
    global calc_path
    logging.basicConfig(level=getattr(logging, loglevel.upper()))
    job_inis = job_ini.split(',')
    assert len(job_inis) in (1, 2), job_inis
    monitor = performance.Monitor(
        'total runtime', measuremem=True)
    if len(job_inis) == 1:  # run hazard or risk
        if hc:
            hc_id = hc[0]
            rlz_ids = hc[1:]
        else:
            hc_id = None
            rlz_ids = ()
        oqparam = readinput.get_oqparam(job_inis[0], hc_id=hc_id)
        if hc_id and hc_id < 0:  # interpret negative calculation ids
            calc_ids = datastore.get_calc_ids()
            try:
                hc_id = calc_ids[hc_id]
            except IndexError:
                raise SystemExit(
                    'There are %d old calculations, cannot '
                    'retrieve the %s' % (len(calc_ids), hc_id))
        calc = base.calculators(oqparam, monitor)
        with calc.monitor:
            calc.run(concurrent_tasks=concurrent_tasks, pdb=pdb,
                     exports=exports, hazard_calculation_id=hc_id,
                     rlz_ids=rlz_ids, **params)
    else:  # run hazard + risk
        calc = run2(
            job_inis[0], job_inis[1], concurrent_tasks, pdb,
            exports, params, monitor)

    logging.info('Total time spent: %s s', monitor.duration)
    logging.info('Memory allocated: %s', general.humansize(monitor.mem))
    monitor.flush()
    print('See the output with hdfview %s' % calc.datastore.hdf5path)
    calc_path = calc.datastore.calc_dir  # used for the .pstat filename
    return calc
Example #17
0
def show(calc_id, key=None, rlzs=None):
    """
    Show the content of a datastore.

    :param calc_id: numeric calculation ID; if 0, show all calculations
    :param key: key of the datastore
    :param rlzs: flag; if given, print out the realizations in order
    """
    if calc_id == 0:  # show all
        if not os.path.exists(datastore.DATADIR):
            return
        rows = []
        for calc_id in datastore.get_calc_ids(datastore.DATADIR):
            try:
                ds = datastore.DataStore(calc_id, mode='r')
                oq = OqParam.from_(ds.attrs)
                cmode, descr = oq.calculation_mode, oq.description
            except:
                # invalid datastore file, or missing calculation_mode
                # and description attributes, perhaps due to a manual kill
                logging.warn('Removed invalid calculation %d', calc_id)
                os.remove(
                    os.path.join(datastore.DATADIR, 'calc_%s.hdf5' % calc_id))
                continue
            else:
                rows.append((calc_id, cmode, descr))
                ds.close()
        for row in sorted(rows, key=lambda row: row[0]):  # by calc_id
            print('#%d %s: %s' % row)
        return
    ds = datastore.DataStore(calc_id, mode='r')
    if key:
        if key in datastore.view:
            print(datastore.view(key, ds))
            return
        obj = ds[key]
        if hasattr(obj, 'value'):  # an array
            print(write_csv(io.StringIO(), obj.value))
        else:
            print(obj)
        return

    oq = OqParam.from_(ds.attrs)

    # this part is experimental
    if rlzs and 'hcurves' in ds:
        min_value = 0.01  # used in rmsep
        curves_by_rlz, mean_curves = get_hcurves_and_means(ds)
        dists = []
        for rlz, curves in curves_by_rlz.items():
            dist = sum(
                rmsep(mean_curves[imt], curves[imt], min_value)
                for imt in mean_curves.dtype.fields)
            dists.append((dist, rlz))
        print('Realizations in order of distance from the mean curves')
        for dist, rlz in sorted(dists):
            print('%s: rmsep=%s' % (rlz, dist))
    else:
        # print all keys
        print(
            oq.calculation_mode, 'calculation (%r) saved in %s contains:' %
            (oq.description, ds.hdf5path))
        for key in ds:
            print(key, humansize(ds.getsize(key)))