def _run(job_ini, concurrent_tasks, pdb, loglevel, hc, exports): global calc_path logging.basicConfig(level=getattr(logging, loglevel.upper())) job_inis = job_ini.split(',') assert len(job_inis) in (1, 2), job_inis monitor = performance.PerformanceMonitor( 'total runtime', measuremem=True) if len(job_inis) == 1: # run hazard or risk oqparam = readinput.get_oqparam(job_inis[0], hc_id=hc) if hc and hc < 0: # interpret negative calculation ids calc_ids = datastore.get_calc_ids() try: hc = calc_ids[hc] except IndexError: raise SystemExit('There are %d old calculations, cannot ' 'retrieve the %s' % (len(calc_ids), hc)) calc = base.calculators(oqparam, monitor) with monitor: calc.run(concurrent_tasks=concurrent_tasks, pdb=pdb, exports=exports, hazard_calculation_id=hc) else: # run hazard + risk calc = run2( job_inis[0], job_inis[1], concurrent_tasks, pdb, exports, monitor) logging.info('Total time spent: %s s', monitor.duration) logging.info('Memory allocated: %s', general.humansize(monitor.mem)) monitor.flush() print('See the output with hdfview %s' % calc.datastore.hdf5path) calc_path = calc.datastore.calc_dir # used to deduce the .pstat filename return calc
def _run(job_ini, concurrent_tasks, pdb, reuse_input, loglevel, exports, params): global calc_path if 'hazard_calculation_id' in params: hc_id = int(params['hazard_calculation_id']) if hc_id < 0: # interpret negative calculation ids calc_ids = datastore.get_calc_ids() try: params['hazard_calculation_id'] = calc_ids[hc_id] except IndexError: raise SystemExit('There are %d old calculations, cannot ' 'retrieve the %s' % (len(calc_ids), hc_id)) else: params['hazard_calculation_id'] = hc_id dic = readinput.get_params(job_ini, params) # set the logs first of all log = logs.init("job", dic, getattr(logging, loglevel.upper())) # disable gzip_input base.BaseCalculator.gzip_inputs = lambda self: None with log, performance.Monitor('total runtime', measuremem=True) as monitor: calc = base.calculators(log.get_oqparam(), log.calc_id) if reuse_input: # enable caching calc.oqparam.cachedir = datastore.get_datadir() calc.run(concurrent_tasks=concurrent_tasks, pdb=pdb, exports=exports) logging.info('Total time spent: %s s', monitor.duration) logging.info('Memory allocated: %s', general.humansize(monitor.mem)) print('See the output with silx view %s' % calc.datastore.filename) calc_path, _ = os.path.splitext(calc.datastore.filename) # used below return calc
def run(job_ini, concurrent_tasks=None, loglevel='info', hc=None, exports=''): """ Run a calculation. Optionally, set the number of concurrent_tasks (0 to disable the parallelization). """ logging.basicConfig(level=getattr(logging, loglevel.upper())) job_inis = job_ini.split(',') assert len(job_inis) in (1, 2), job_inis monitor = performance.Monitor('total', measuremem=True) if len(job_inis) == 1: # run hazard or risk oqparam = readinput.get_oqparam(job_inis[0], hc_id=hc) if hc and hc < 0: # interpret negative calculation ids calc_ids = datastore.get_calc_ids() try: hc = calc_ids[hc] except IndexError: raise SystemExit('There are %d old calculations, cannot ' 'retrieve the %s' % (len(calc_ids), hc)) calc = base.calculators(oqparam, monitor) monitor.monitor_dir = calc.datastore.calc_dir with monitor: calc.run(concurrent_tasks=concurrent_tasks, exports=exports, hazard_calculation_id=hc) else: # run hazard + risk calc = run2( job_inis[0], job_inis[1], concurrent_tasks, exports, monitor) logging.info('Total time spent: %s s', monitor.duration) logging.info('Memory allocated: %s', general.humansize(monitor.mem)) monitor.flush() print('See the output with hdfview %s/output.hdf5' % calc.datastore.calc_dir) return calc
def show(calc_id, key=None, rlzs=None): """ Show the content of a datastore. :param calc_id: numeric calculation ID; if 0, show all calculations :param key: key of the datastore :param rlzs: flag; if given, print out the realizations in order """ if not calc_id: if not os.path.exists(datastore.DATADIR): return rows = [] for calc_id in datastore.get_calc_ids(datastore.DATADIR): try: oq = OqParam.from_(datastore.DataStore(calc_id).attrs) cmode, descr = oq.calculation_mode, oq.description except: # invalid datastore directory logging.warn('Removed invalid calculation %d', calc_id) shutil.rmtree( os.path.join(datastore.DATADIR, 'calc_%s' % calc_id)) else: rows.append((calc_id, cmode, descr)) for row in sorted(rows, key=lambda row: row[0]): # by calc_id print('#%d %s: %s' % row) return ds = datastore.DataStore(calc_id) if key: if key in datastore.view: print(datastore.view(key, ds)) return obj = ds[key] if hasattr(obj, 'value'): # an array print(write_csv(io.StringIO(), obj.value)) else: print(obj) return # print all keys oq = OqParam.from_(ds.attrs) print( oq.calculation_mode, 'calculation (%r) saved in %s contains:' % (oq.description, ds.calc_dir)) for key in ds: print(key, humansize(ds.getsize(key))) # this part is experimental and not tested on purpose if rlzs and 'curves_by_trt_gsim' in ds: min_value = 0.01 # used in rmsep curves_by_rlz, mean_curves = combined_curves(ds) dists = [] for rlz in sorted(curves_by_rlz): curves = curves_by_rlz[rlz] dist = sum( rmsep(mean_curves[imt], curves[imt], min_value) for imt in mean_curves.dtype.fields) dists.append((dist, rlz)) for dist, rlz in sorted(dists): print('rlz=%s, rmsep=%s' % (rlz, dist))
def show(what, calc_id=-1): """ Show the content of a datastore. :param what: key or view of the datastore :param calc_id: numeric calculation ID; if -1, show the last calculation """ if what == 'all': # show all if not os.path.exists(datastore.DATADIR): return rows = [] for calc_id in datastore.get_calc_ids(datastore.DATADIR): try: ds = datastore.read(calc_id) oq = ds['oqparam'] cmode, descr = oq.calculation_mode, oq.description except: # invalid datastore file, or missing calculation_mode # and description attributes, perhaps due to a manual kill f = os.path.join(datastore.DATADIR, 'calc_%s.hdf5' % calc_id) logging.warn('Unreadable datastore %s', f) continue else: rows.append((calc_id, cmode, descr.encode('utf-8'))) ds.close() for row in sorted(rows, key=lambda row: row[0]): # by calc_id print('#%d %s: %s' % row) return elif what == 'views': for name in sorted(datastore.view): print(name) return ds = datastore.read(calc_id) # this part is experimental if what == 'rlzs' and 'hcurves' in ds: min_value = 0.01 # used in rmsep curves_by_rlz, mean_curves = get_hcurves_and_means(ds) dists = [] for rlz, curves in curves_by_rlz.items(): dist = sum( rmsep(mean_curves[imt], curves[imt], min_value) for imt in mean_curves.dtype.fields) dists.append((dist, rlz)) print('Realizations in order of distance from the mean curves') for dist, rlz in sorted(dists): print('%s: rmsep=%s' % (rlz, dist)) elif what in datastore.view: print(datastore.view(what, ds)) else: obj = ds[what] if hasattr(obj, 'value'): # an array print(write_csv(io.StringIO(), obj.value)) else: print(obj)
def show(what, calc_id=-1): """ Show the content of a datastore. :param what: key or view of the datastore :param calc_id: numeric calculation ID; if -1, show the last calculation """ if what == 'all': # show all if not os.path.exists(datastore.DATADIR): return rows = [] for calc_id in datastore.get_calc_ids(datastore.DATADIR): try: ds = datastore.read(calc_id) oq = ds['oqparam'] cmode, descr = oq.calculation_mode, oq.description except: # invalid datastore file, or missing calculation_mode # and description attributes, perhaps due to a manual kill f = os.path.join(datastore.DATADIR, 'calc_%s.hdf5' % calc_id) logging.warn('Unreadable datastore %s', f) continue else: rows.append((calc_id, cmode, descr.encode('utf-8'))) ds.close() for row in sorted(rows, key=lambda row: row[0]): # by calc_id print('#%d %s: %s' % row) return elif what == 'views': for name in sorted(datastore.view): print(name) return ds = datastore.read(calc_id) # this part is experimental if what == 'rlzs' and 'hcurves' in ds: min_value = 0.01 # used in rmsep curves_by_rlz, mean_curves = get_hcurves_and_means(ds) dists = [] for rlz, curves in curves_by_rlz.items(): dist = sum(rmsep(mean_curves[imt], curves[imt], min_value) for imt in mean_curves.dtype.fields) dists.append((dist, rlz)) print('Realizations in order of distance from the mean curves') for dist, rlz in sorted(dists): print('%s: rmsep=%s' % (rlz, dist)) elif what in datastore.view: print(datastore.view(what, ds)) else: obj = ds[what] if hasattr(obj, 'value'): # an array print(write_csv(io.StringIO(), obj.value)) else: print(obj)
def show(calc_id, key=None, rlzs=None): """ Show the content of a datastore. :param calc_id: numeric calculation ID; if 0, show all calculations :param key: key of the datastore :param rlzs: flag; if given, print out the realizations in order """ if not calc_id: if not os.path.exists(datastore.DATADIR): return rows = [] for calc_id in datastore.get_calc_ids(datastore.DATADIR): try: oq = OqParam.from_(datastore.DataStore(calc_id).attrs) cmode, descr = oq.calculation_mode, oq.description except: # invalid datastore directory logging.warn('Removed invalid calculation %d', calc_id) shutil.rmtree(os.path.join( datastore.DATADIR, 'calc_%s' % calc_id)) else: rows.append((calc_id, cmode, descr)) for row in sorted(rows, key=lambda row: row[0]): # by calc_id print('#%d %s: %s' % row) return ds = datastore.DataStore(calc_id) if key: if key in datastore.view: print(datastore.view(key, ds)) return obj = ds[key] if hasattr(obj, 'value'): # an array print(write_csv(io.StringIO(), obj.value)) else: print(obj) return # print all keys oq = OqParam.from_(ds.attrs) print(oq.calculation_mode, 'calculation (%r) saved in %s contains:' % (oq.description, ds.hdf5path)) for key in ds: print(key, humansize(ds.getsize(key))) # this part is experimental and not tested on purpose if rlzs and 'curves_by_trt_gsim' in ds: min_value = 0.01 # used in rmsep curves_by_rlz, mean_curves = combined_curves(ds) dists = [] for rlz in sorted(curves_by_rlz): curves = curves_by_rlz[rlz] dist = sum(rmsep(mean_curves[imt], curves[imt], min_value) for imt in mean_curves.dtype.fields) dists.append((dist, rlz)) for dist, rlz in sorted(dists): print('rlz=%s, rmsep=%s' % (rlz, dist))
def purge(calc_id): """ Remove the given calculation. If you want to remove all calculations, use oq reset. """ if calc_id < 0: try: calc_id = datastore.get_calc_ids()[calc_id] except IndexError: print('Calculation %d not found' % calc_id) return purge_one(calc_id, getpass.getuser())
def main(what='contents', calc_id: str_or_int = -1, extra=()): """ Show the content of a datastore (by default the last one). """ datadir = datastore.get_datadir() if what == 'all': # show all if not os.path.exists(datadir): return rows = [] for calc_id in datastore.get_calc_ids(datadir): try: ds = datastore.read(calc_id) oq = ds['oqparam'] cmode, descr = oq.calculation_mode, oq.description except Exception: # invalid datastore file, or missing calculation_mode # and description attributes, perhaps due to a manual kill f = os.path.join(datadir, 'calc_%s.hdf5' % calc_id) logging.warning('Unreadable datastore %s', f) continue else: rows.append((calc_id, cmode, descr.encode('utf-8'))) for row in sorted(rows, key=lambda row: row[0]): # by calc_id print('#%d %s: %s' % row) return ds = datastore.read(calc_id) # this part is experimental if view.keyfunc(what) in view: print(view(what, ds)) elif what.split('/', 1)[0] in extract: obj = extract(ds, what, *extra) if isinstance(obj, hdf5.ArrayWrapper): print_(obj) elif hasattr(obj, 'dtype') and obj.dtype.names: print(writers.write_csv(io.StringIO(), obj)) else: print(obj) elif what in ds: obj = ds.getitem(what) if '__pdcolumns__' in obj.attrs: df = ds.read_df(what) print(df.sort_values(df.columns[0])) elif hasattr(obj, 'items'): # is a group of datasets print(obj) else: # is a single dataset obj.refresh() # for SWMR mode print_(hdf5.ArrayWrapper.from_(obj)) else: print('%s not found' % what) ds.close()
def get_calc_id(job_id=None): """ Return the latest calc_id by looking both at the datastore and the database. """ calcs = datastore.get_calc_ids(datastore.DATADIR) calc_id = 0 if not calcs else calcs[-1] if job_id is None: try: job_id = models.OqJob.objects.latest('id').id except exceptions.ObjectDoesNotExist: job_id = 0 return max(calc_id, job_id)
def get_calc_id(datadir, job_id=None): """ Return the latest calc_id by looking both at the datastore and the database. """ calcs = datastore.get_calc_ids(datadir) calc_id = 0 if not calcs else calcs[-1] if job_id is None: try: job_id = models.OqJob.objects.latest('id').id except exceptions.ObjectDoesNotExist: job_id = 0 return max(calc_id, job_id)
def purge(calc_id): """ Remove the given calculation. If calc_id is 0, remove all calculations. """ user = getpass.getuser() if not calc_id: for fname in os.listdir(datastore.DATADIR): mo = re.match('calc_(\d+)\.hdf5', fname) if mo is not None: calc_id = int(mo.group(1)) purge_one(calc_id, user) else: if calc_id < 0: calc_id = datastore.get_calc_ids()[calc_id] purge_one(calc_id, user)
def get_calc_id(db, datadir, job_id=None): """ Return the latest calc_id by looking both at the datastore and the database. :param db: a :class:`openquake.server.dbapi.Db` instance :param datadir: the directory containing the datastores :param job_id: a job ID; if None, returns the latest job ID """ calcs = datastore.get_calc_ids(datadir) calc_id = 0 if not calcs else calcs[-1] if job_id is None: try: job_id = db('SELECT seq FROM sqlite_sequence WHERE name="job"', scalar=True) except NotFound: job_id = 0 return max(calc_id, job_id)
def _run(job_inis, concurrent_tasks, calc_id, pdb, reuse_input, loglevel, exports, params): global calc_path assert len(job_inis) in (1, 2), job_inis # set the logs first of all calc_id = logs.init(calc_id, getattr(logging, loglevel.upper())) # disable gzip_input base.BaseCalculator.gzip_inputs = lambda self: None with performance.Monitor('total runtime', measuremem=True) as monitor: if os.environ.get('OQ_DISTRIBUTE') not in ('no', 'processpool'): os.environ['OQ_DISTRIBUTE'] = 'processpool' if len(job_inis) == 1: # run hazard or risk if 'hazard_calculation_id' in params: hc_id = int(params['hazard_calculation_id']) else: hc_id = None if hc_id and hc_id < 0: # interpret negative calculation ids calc_ids = datastore.get_calc_ids() try: params['hazard_calculation_id'] = str(calc_ids[hc_id]) except IndexError: raise SystemExit( 'There are %d old calculations, cannot ' 'retrieve the %s' % (len(calc_ids), hc_id)) oqparam = readinput.get_oqparam(job_inis[0], kw=params) calc = base.calculators(oqparam, calc_id) if reuse_input: # enable caching oqparam.cachedir = datastore.get_datadir() calc.run(concurrent_tasks=concurrent_tasks, pdb=pdb, exports=exports) else: # run hazard + risk calc = run2( job_inis[0], job_inis[1], calc_id, concurrent_tasks, pdb, reuse_input, loglevel, exports, params) logging.info('Total time spent: %s s', monitor.duration) logging.info('Memory allocated: %s', general.humansize(monitor.mem)) print('See the output with silx view %s' % calc.datastore.filename) calc_path, _ = os.path.splitext(calc.datastore.filename) # used below return calc
def _run(job_ini, concurrent_tasks, pdb, loglevel, hc, exports, params): global calc_path logging.basicConfig(level=getattr(logging, loglevel.upper())) job_inis = job_ini.split(',') assert len(job_inis) in (1, 2), job_inis monitor = performance.Monitor( 'total runtime', measuremem=True) if len(job_inis) == 1: # run hazard or risk if hc: hc_id = hc[0] rlz_ids = hc[1:] else: hc_id = None rlz_ids = () oqparam = readinput.get_oqparam(job_inis[0], hc_id=hc_id) if hc_id and hc_id < 0: # interpret negative calculation ids calc_ids = datastore.get_calc_ids() try: hc_id = calc_ids[hc_id] except IndexError: raise SystemExit( 'There are %d old calculations, cannot ' 'retrieve the %s' % (len(calc_ids), hc_id)) calc = base.calculators(oqparam, monitor) with calc.monitor: calc.run(concurrent_tasks=concurrent_tasks, pdb=pdb, exports=exports, hazard_calculation_id=hc_id, rlz_ids=rlz_ids, **params) else: # run hazard + risk calc = run2( job_inis[0], job_inis[1], concurrent_tasks, pdb, exports, params, monitor) logging.info('Total time spent: %s s', monitor.duration) logging.info('Memory allocated: %s', general.humansize(monitor.mem)) monitor.flush() print('See the output with hdfview %s' % calc.datastore.hdf5path) calc_path = calc.datastore.calc_dir # used for the .pstat filename return calc
def show(calc_id, key=None, rlzs=None): """ Show the content of a datastore. :param calc_id: numeric calculation ID; if 0, show all calculations :param key: key of the datastore :param rlzs: flag; if given, print out the realizations in order """ if calc_id == 0: # show all if not os.path.exists(datastore.DATADIR): return rows = [] for calc_id in datastore.get_calc_ids(datastore.DATADIR): try: ds = datastore.DataStore(calc_id, mode='r') oq = OqParam.from_(ds.attrs) cmode, descr = oq.calculation_mode, oq.description except: # invalid datastore file, or missing calculation_mode # and description attributes, perhaps due to a manual kill logging.warn('Removed invalid calculation %d', calc_id) os.remove( os.path.join(datastore.DATADIR, 'calc_%s.hdf5' % calc_id)) continue else: rows.append((calc_id, cmode, descr)) ds.close() for row in sorted(rows, key=lambda row: row[0]): # by calc_id print('#%d %s: %s' % row) return ds = datastore.DataStore(calc_id, mode='r') if key: if key in datastore.view: print(datastore.view(key, ds)) return obj = ds[key] if hasattr(obj, 'value'): # an array print(write_csv(io.StringIO(), obj.value)) else: print(obj) return oq = OqParam.from_(ds.attrs) # this part is experimental if rlzs and 'hcurves' in ds: min_value = 0.01 # used in rmsep curves_by_rlz, mean_curves = get_hcurves_and_means(ds) dists = [] for rlz, curves in curves_by_rlz.items(): dist = sum( rmsep(mean_curves[imt], curves[imt], min_value) for imt in mean_curves.dtype.fields) dists.append((dist, rlz)) print('Realizations in order of distance from the mean curves') for dist, rlz in sorted(dists): print('%s: rmsep=%s' % (rlz, dist)) else: # print all keys print( oq.calculation_mode, 'calculation (%r) saved in %s contains:' % (oq.description, ds.hdf5path)) for key in ds: print(key, humansize(ds.getsize(key)))