def process_inputs(config): job = inputs.readFile(config['jobfile']) data = inputs.readFile(config['datafile']) # there's scope for something silly to go wrong here, but for now # let's just assume it won't... timedata = data['timeseries'] # check for required inputs -- typically param and/or input may also be needed, # but it is conceivable that one might want to run a job without them for field in ['model', 'var']: if field not in job['header']: raise Exception("field '%s' must be specified in the job file" % field) model = job['header']['model'][0][0] vars = job['header']['var'] params = job['header'].get('param', []) # params specified in external files are added after, and skipped by # process_vars if already specified by a param line param_file = job['header'].get('param_file', []) for item in param_file: for filename in item: with open(filename) as f: for line in f: pp = [x.strip() for x in line.split(',')] try: dummy = float(pp[2]) params.append(pp) except ValueError: pass ins = job['header'].get('input', []) # list of chosen params, or '*' for all known (but really, don't do that!) param_select = job['header'].get('param_select', [[PARAM_SELECT]]) param_select = [x for line in param_select for x in line] if PARAM_SELECT in param_select: param_select = PARAM_SELECT aliaslist = job['header'].get('alias', []) aliases = {} for alias in aliaslist: aliases[alias[0]] = alias[1] tname = aliases.get('t', inputs.LOCAL_TIME) if config['work']: workdir = os.path.join(config['build'], config['work']) else: timestamp = datetime.datetime.fromtimestamp( time.time()).strftime('%Y_%m_%d_%H_%M_%S') workdir = os.path.join(config['build'], model, timestamp) config['name'] = model config['program'] = job['header'].get( 'program', [[os.path.join(BUILD, model + '.model')]])[0][0] config['model_io'] = job['header'].get( 'model_io', [[os.path.join(workdir, 'model_io')]])[0][0] config['work'] = workdir config['info'] = os.path.join(workdir, config['info']) if 'timestep' in job['header']: config['timestep'] = float(job['header']['timestep'][0][0]) if tname not in timedata: if config['timestep']: stepcount = len(timedata[timedata.keys()[0]]) timedata[tname] = np.array(range(stepcount)) * config['timestep'] else: raise Exception("time step field '%s' not present in data file" % tname) config['times'] = timedata[tname] config['vars'], varnames = process_vars(vars, aliases, timedata) config['params'], pnames = process_vars(params, aliases, timedata) config['param_unselect'] = [] if param_select != PARAM_SELECT: all_params = config['params'] config['params'] = [] for param in all_params: if param['name'] in param_select: config['params'].append(param) else: config['param_unselect'].append(param) config['inputs'], innames = process_vars(ins, aliases, timedata) config['baseSeq'], dummy = steps.readFiles(job['header'].get('init', [[]])[0]) config['job_mode'] = job['header'].get('job_mode', [[JOB_MODE]])[0][0] config['solver'] = job['header'].get('solver', [[SOLVER]])[0][0] config['steady'] = float(job['header'].get('steady', [[STEADY]])[0][0]) config['max_iter'] = int(job['header'].get('max_iter', [[MAX_ITER]])[0][0]) if 'sigma' in job['header']: config['sigma'] = float(job['header']['sigma'][0][0]) weights = job['header'].get('weight', []) for weight in weights: config['weights'][weight[0]] = float(weight[1]) # record any posthoc transformations for optimisation variables posts = job['header'].get('post', []) for post in posts: if post[0] in varnames: ff = posthoc.get(post[1:]) if ff is not None: config['vars'][varnames.index(post[0])]['post'].append(ff) # for the moment the only supported distance functions are in the distance module # if that's ever not the case this could be a bit trickier... if config['sigma'] is not None and job['header'].get( 'distance', [[DISTANCE]])[0][0] == 'loglik': config['distance'] = distance.loglikWithSigma(config['sigma']) else: config['distance'] = getattr( distance, job['header'].get('distance', [[DISTANCE]])[0][0]) return config
def process_inputs(config): print 'Processing inputs' job = inputs.readFile(config['jobfile']) data = inputs.readFile(config['datafile']) # there's scope for something silly to go wrong here, but for now # let's just assume it won't... timedata = data['timeseries'] # check for required inputs -- typically param and/or input may also be needed, # but it is conceivable that one might want to run a job without them for field in ['model', 'var']: if field not in job['header']: raise Exception("field '%s' must be specified in the job file" % field) model = job['header']['model'][0][0] vars = job['header']['var'] params = job['header'].get('param', []) # params specified in external files are added after, and skipped by # process_vars if already specified by a param line param_file = job['header'].get('param_file', []) for item in param_file: for filename in item: with open(filename) as f: for line in f: if ',' in line: pp = [x.strip() for x in line.split(',')] else: pp = line.split() try: dummy = float(pp[2]) params.append(pp) except (ValueError, IndexError): pass ins = job['header'].get('input', []) # list of chosen params, or '*' for all known param_select = job['header'].get('param_select', [[PARAM_SELECT]]) param_select = [x for line in param_select for x in line] if PARAM_SELECT in param_select: param_select = PARAM_SELECT aliaslist = job['header'].get('alias', []) aliases = {} for alias in aliaslist: aliases[alias[0]] = alias[1] tname = aliases.get('t', inputs.LOCAL_TIME) if config['work']: workdir = os.path.join(config['build'], config['work']) else: timestamp = datetime.datetime.fromtimestamp( time.time()).strftime('%Y_%m_%d_%H_%M_%S') workdir = os.path.join(config['build'], model, timestamp) config['name'] = model config['program'] = job['header'].get( 'program', [[os.path.join(BUILD, model + '.model')]])[0][0] config['model_io'] = job['header'].get( 'model_io', [[os.path.join(workdir, 'model_io')]])[0][0] config['work'] = workdir config['outfile'] = os.path.join(workdir, config['outfile']) config['sensitivities'] = os.path.join(workdir, config['sensitivities']) config['info'] = os.path.join(workdir, config['info']) if not os.path.isfile(config['program']): raise Exception("model executable '%s' does not exist" % config['program']) if tname not in timedata: raise Exception("time step field '%s' not present in data file" % tname) config['times'] = timedata[tname] config['vars'] = process_vars(vars, aliases, timedata) config['params'] = process_vars(params, aliases, timedata) if param_select != PARAM_SELECT: all_params = config['params'] config['params'] = [] for param in all_params: if param['name'] in param_select: config['params'].append(param) config['inputs'] = process_vars(ins, aliases, timedata) config['baseSeq'], dummy = steps.readFiles(job['header'].get('init', [[]])[0]) config['divisions'] = int(job['header'].get('divisions', [[DIVISIONS]])[0][0]) config['nbatch'] = int(job['header'].get('nbatch', [[NBATCH]])[0][0]) config['job_mode'] = job['header'].get('job_mode', [[JOB_MODE]])[0][0] config['npath'] = int(job['header'].get('npath', [[NPATH]])[0][0]) config['jump'] = int(job['header'].get('jump', [[JUMP]])[0][0]) config['interference'] = int(job['header'].get('interference', [[INTERFERENCE]])[0][0]) config['save_interval'] = int(job['header'].get('save_interval', [[SAVE_INTERVAL]])[0][0]) config['delta'] = float(job['header'].get('delta', [[DELTA]])[0][0]) if 'delta' in job['header'] and len(job['header']['delta'][0]) > 1: config['relative_delta'] = job['header']['delta'][0][1] == 'relative' config['timeout'] = int(job['header'].get('timeout', [[model_bcmd.TIMEOUT]])[0][0]) # hack alert -- option for non-finite distances to be replaced with some real value config['substitute'] = float(job['header'].get( 'substitute', [[distance.SUBSTITUTE]])[0][0]) distance.SUBSTITUTE = config['substitute'] if config['perturb']: config['beta'] = int(job['header'].get('beta', [[BETA]])[0][0]) else: # ignore multiple trials in config if not perturbing config['beta'] = 1 # weight sums over vars for hessian jobs, for optim compatibility weights = job['header'].get('weight', {}) for weight in weights: config['weights'][weight[0]] = float(weight[1]) if 'sigma' in job['header']: config['sigma'] = float(job['header']['sigma'][0][0]) # for the moment the only supported distance functions are in the distance module # if that's ever not the case this could be a bit trickier... if config['sigma'] is not None and job['header'].get( 'distance', [[DISTANCE]])[0][0] == 'loglik': config['distance'] = distance.loglikWithSigma(config['sigma']) print 'using sigma=%g' % config['sigma'] else: config['distance'] = getattr( distance, job['header'].get('distance', [[DISTANCE]])[0][0]) return config
def process_inputs(config): job = inputs.readFile(config['jobfile']) data = inputs.readFile(config['datafile']) # there's scope for something silly to go wrong here, but for now # let's just assume it won't... timedata = data['timeseries'] # check for required inputs -- typically param and/or input may also be needed, # but it is conceivable that one might want to run a job without them for field in ['model', 'var']: if field not in job['header']: raise Exception("field '%s' must be specified in the job file" % field) model = job['header']['model'][0][0] vars = job['header']['var'] params = job['header'].get('param', []) # params specified in external files are added after, and skipped by # process_vars if already specified by a param line param_file = job['header'].get('param_file', []) for item in param_file: for filename in item: with open(filename) as f: for line in f: pp = [x.strip() for x in line.split(',')] try: dummy = float(pp[2]) params.append(pp) except ValueError: pass ins = job['header'].get('input', []) # list of chosen params, or '*' for all known (but really, don't do that!) param_select = job['header'].get('param_select', [[PARAM_SELECT]]) param_select = [x for line in param_select for x in line] if PARAM_SELECT in param_select: param_select = PARAM_SELECT aliaslist = job['header'].get('alias', []) aliases = {} for alias in aliaslist: aliases[alias[0]] = alias[1] tname = aliases.get('t', inputs.LOCAL_TIME) if config['work']: workdir = os.path.join(config['build'], config['work']) else: timestamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y_%m_%d_%H_%M_%S') workdir = os.path.join(config['build'], model, timestamp) config['name'] = model config['program'] = job['header'].get('program', [[os.path.join(BUILD, model + '.model')]])[0][0] config['model_io'] = job['header'].get('model_io', [[os.path.join(workdir, 'model_io')]])[0][0] config['work'] = workdir config['info'] = os.path.join(workdir, config['info']) if 'timestep' in job['header']: config['timestep'] = float(job['header']['timestep'][0][0]) if tname not in timedata: if config['timestep']: stepcount = len(timedata[timedata.keys()[0]]) timedata[tname] = np.array(range(stepcount)) * config['timestep'] else: raise Exception("time step field '%s' not present in data file" % tname) config['times'] = timedata[tname] config['vars'], varnames = process_vars(vars, aliases, timedata) config['params'], pnames = process_vars(params, aliases, timedata) config['param_unselect'] = [] if param_select != PARAM_SELECT: all_params = config['params'] config['params'] = [] for param in all_params: if param['name'] in param_select: config['params'].append(param) else: config['param_unselect'].append(param) config['inputs'], innames = process_vars(ins, aliases, timedata) config['baseSeq'], dummy = steps.readFiles(job['header'].get('init', [[]])[0]) config['job_mode'] = job['header'].get('job_mode', [[JOB_MODE]])[0][0] config['solver'] = job['header'].get('solver', [[SOLVER]])[0][0] config['steady'] = float(job['header'].get('steady', [[STEADY]])[0][0]) config['max_iter'] = int(job['header'].get('max_iter', [[MAX_ITER]])[0][0]) if 'sigma' in job['header']: config['sigma'] = float(job['header']['sigma'][0][0]) weights = job['header'].get('weight', []) for weight in weights: config['weights'][weight[0]] = float(weight[1]) # record any posthoc transformations for optimisation variables posts = job['header'].get('post', []) for post in posts: if post[0] in varnames: ff = posthoc.get(post[1:]) if ff is not None: config['vars'][varnames.index(post[0])]['post'].append(ff) # for the moment the only supported distance functions are in the distance module # if that's ever not the case this could be a bit trickier... if config['sigma'] is not None and job['header'].get('distance', [[DISTANCE]])[0][0] == 'loglik': config['distance'] = distance.loglikWithSigma(config['sigma']) else: config['distance'] = getattr(distance, job['header'].get('distance', [[DISTANCE]])[0][0]) return config
def process_inputs(jobfile, datafile): config = CONFIG job = inputs.readFile(jobfile) data = inputs.readFile(datafile) timedata = data['timeseries'] # check for required inputs for field in ['model', 'var', 'input']: if field not in job['header']: raise Exception("field '%s' must be specified in the job file" % field) model = job['header']['model'][0][0] vars = job['header']['var'] params = job['header'].get('param', []) # params specified in external files are added after, and skipped by # process_vars if already specified by a param line param_file = job['header'].get('param_file', []) for item in param_file: for filename in item: with open(filename) as f: for line in f: if ',' in line: pp = [x.strip() for x in line.split(',')] else: pp = line.split() try: dummy = float(pp[2]) params.append(pp) except (ValueError, IndexError): pass ins = job['header']['input'] # list of chosen params, or '*' for all known param_select = job['header'].get('param_select', [[PARAM_SELECT]]) param_select = [x for line in param_select for x in line] if PARAM_SELECT in param_select: param_select = PARAM_SELECT aliaslist = job['header'].get('alias', []) aliases = {} for alias in aliaslist: aliases[alias[0]] = alias[1] tname = aliases.get('t', inputs.LOCAL_TIME) if config['work']: workdir = os.path.join(config['build'], config['work']) else: timestamp = datetime.datetime.fromtimestamp( time.time()).strftime('%Y_%m_%d_%H_%M_%S') workdir = os.path.join(config['build'], model, timestamp) config['work'] = workdir # TODO: allow more levels of control over this stuff in the job file config['name'] = model config['program'] = job['header'].get( 'program', [[os.path.join(BUILD, model + '.model')]])[0][0] config['model_io'] = job['header'].get( 'model_io', [[os.path.join(workdir, 'model_io')]])[0][0] config['abc_io'] = job['header'].get( 'abc_io', [[os.path.join(workdir, 'abc_io')]])[0][0] if not os.path.isfile(config['program']): raise Exception("model executable '%s' does not exist" % config['program']) if tname not in timedata: raise Exception("time step field '%s' not present in data file" % tname) config['times'] = timedata[tname] config['vars'] = process_vars(vars, aliases, timedata) config['params'] = process_vars(params, aliases, timedata) if param_select != PARAM_SELECT: all_params = config['params'] config['params'] = [] for param in all_params: if param['name'] in param_select: config['params'].append(param) config['inputs'] = process_vars(ins, aliases, timedata) config['baseSeq'], dummy = steps.readFiles(job['header'].get('init', [[]])[0]) config['particles'] = int(job['header'].get('particles', [[NPARTICLES]])[0][0]) config['nbatch'] = int(job['header'].get('nbatch', [[NBATCH]])[0][0]) config['modelKernel'] = float(job['header'].get('modelKernel', [[MODELKERNEL]])[0][0]) config['finalepsilon'] = float(job['header'].get('finalepsilon', [[FINALEPSILON]])[0][0]) config['alpha'] = float(job['header'].get('alpha', [[ALPHA]])[0][0]) config['beta'] = int(job['header'].get('beta', [[BETA]])[0][0]) config['timeout'] = int(job['header'].get('timeout', [[model_bcmd.TIMEOUT]])[0][0]) # ABC-SYSBIO distance funcs have the same names as ordinary ones but with suffix "Distance" # TODO: handle loglik sigma factory config['distance'] = getattr( distance, job['header'].get('distance', [[DISTANCE]])[0][0] + "Distance") return config
def process_inputs(config): print 'Processing inputs' job = inputs.readFile(config['jobfile']) data = inputs.readFile(config['datafile']) # there's scope for something silly to go wrong here, but for now # let's just assume it won't... timedata = data['timeseries'] # check for required inputs -- typically param and/or input may also be needed, # but it is conceivable that one might want to run a job without them for field in ['model', 'var']: if field not in job['header']: raise Exception("field '%s' must be specified in the job file" % field) model = job['header']['model'][0][0] vars = job['header']['var'] params = job['header'].get('param', []) # params specified in external files are added after, and skipped by # process_vars if already specified by a param line param_file = job['header'].get('param_file', []) for item in param_file: for filename in item: with open(filename) as f: for line in f: if ',' in line: pp = [x.strip() for x in line.split(',')] else: pp = line.split() try: dummy = float(pp[2]) params.append(pp) except (ValueError, IndexError): pass ins = job['header'].get('input', []) # list of chosen params, or '*' for all known param_select = job['header'].get('param_select', [[PARAM_SELECT]]) param_select = [x for line in param_select for x in line] if PARAM_SELECT in param_select: param_select = PARAM_SELECT aliaslist = job['header'].get('alias', []) aliases = {} for alias in aliaslist: aliases[alias[0]] = alias[1] tname = aliases.get('t', inputs.LOCAL_TIME) if config['work']: workdir = os.path.join(config['build'], config['work']) else: timestamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y_%m_%d_%H_%M_%S') workdir = os.path.join(config['build'], model, timestamp) config['name'] = model config['program'] = job['header'].get('program', [[os.path.join(BUILD, model + '.model')]])[0][0] config['model_io'] = job['header'].get('model_io', [[os.path.join(workdir, 'model_io')]])[0][0] config['work'] = workdir config['outfile'] = os.path.join(workdir, config['outfile']) config['sensitivities'] = os.path.join(workdir, config['sensitivities']) config['info'] = os.path.join(workdir, config['info']) if not os.path.isfile(config['program']): raise Exception("model executable '%s' does not exist" % config['program']) if tname not in timedata: raise Exception("time step field '%s' not present in data file" % tname) config['times'] = timedata[tname] config['vars'] = process_vars(vars, aliases, timedata) config['params'] = process_vars(params, aliases, timedata) if param_select != PARAM_SELECT: all_params = config['params'] config['params'] = [] for param in all_params: if param['name'] in param_select: config['params'].append(param) config['inputs'] = process_vars(ins, aliases, timedata) config['baseSeq'], dummy = steps.readFiles(job['header'].get('init', [[]])[0]) config['divisions'] = int(job['header'].get('divisions', [[DIVISIONS]])[0][0]) config['nbatch'] = int(job['header'].get('nbatch', [[NBATCH]])[0][0]) config['job_mode'] = job['header'].get('job_mode', [[JOB_MODE]])[0][0] config['npath'] = int(job['header'].get('npath', [[NPATH]])[0][0]) config['jump'] = int(job['header'].get('jump', [[JUMP]])[0][0]) config['interference'] = int(job['header'].get('interference', [[INTERFERENCE]])[0][0]) config['save_interval'] = int(job['header'].get('save_interval', [[SAVE_INTERVAL]])[0][0]) config['delta'] = float(job['header'].get('delta', [[DELTA]])[0][0]) if 'delta' in job['header'] and len(job['header']['delta'][0]) > 1: config['relative_delta'] = job['header']['delta'][0][1] == 'relative' config['timeout'] = int(job['header'].get('timeout', [[model_bcmd.TIMEOUT]])[0][0]) # hack alert -- option for non-finite distances to be replaced with some real value config['substitute'] = float(job['header'].get('substitute', [[distance.SUBSTITUTE]])[0][0]) distance.SUBSTITUTE = config['substitute'] if config['perturb']: config['beta'] = int(job['header'].get('beta', [[BETA]])[0][0]) else: # ignore multiple trials in config if not perturbing config['beta'] = 1 # weight sums over vars for hessian jobs, for optim compatibility weights = job['header'].get('weight', {}) for weight in weights: config['weights'][weight[0]] = float(weight[1]) if 'sigma' in job['header']: config['sigma'] = float(job['header']['sigma'][0][0]) # for the moment the only supported distance functions are in the distance module # if that's ever not the case this could be a bit trickier... if config['sigma'] is not None and job['header'].get('distance', [[DISTANCE]])[0][0] == 'loglik': config['distance'] = distance.loglikWithSigma(config['sigma']) print 'using sigma=%g' % config['sigma'] else: config['distance'] = getattr(distance, job['header'].get('distance', [[DISTANCE]])[0][0]) return config
def process_inputs(jobfile, datafile): config = CONFIG job = inputs.readFile(jobfile) data = inputs.readFile(datafile) timedata = data['timeseries'] # check for required inputs for field in ['model', 'var', 'input']: if field not in job['header']: raise Exception("field '%s' must be specified in the job file" % field) model = job['header']['model'][0][0] vars = job['header']['var'] params = job['header'].get('param', []) # params specified in external files are added after, and skipped by # process_vars if already specified by a param line param_file = job['header'].get('param_file', []) for item in param_file: for filename in item: with open(filename) as f: for line in f: if ',' in line: pp = [x.strip() for x in line.split(',')] else: pp = line.split() try: dummy = float(pp[2]) params.append(pp) except (ValueError, IndexError): pass ins = job['header']['input'] # list of chosen params, or '*' for all known param_select = job['header'].get('param_select', [[PARAM_SELECT]]) param_select = [x for line in param_select for x in line] if PARAM_SELECT in param_select: param_select = PARAM_SELECT aliaslist = job['header'].get('alias', []) aliases = {} for alias in aliaslist: aliases[alias[0]] = alias[1] tname = aliases.get('t', inputs.LOCAL_TIME) if config['work']: workdir = os.path.join(config['build'], config['work']) else: timestamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y_%m_%d_%H_%M_%S') workdir = os.path.join(config['build'], model, timestamp) config['work'] = workdir # TODO: allow more levels of control over this stuff in the job file config['name'] = model config['program'] = job['header'].get('program', [[os.path.join(BUILD, model + '.model')]])[0][0] config['model_io'] = job['header'].get('model_io', [[os.path.join(workdir, 'model_io')]])[0][0] config['abc_io'] = job['header'].get('abc_io', [[os.path.join(workdir, 'abc_io')]])[0][0] if not os.path.isfile(config['program']): raise Exception("model executable '%s' does not exist" % config['program']) if tname not in timedata: raise Exception("time step field '%s' not present in data file" % tname) config['times'] = timedata[tname] config['vars'] = process_vars(vars, aliases, timedata) config['params'] = process_vars(params, aliases, timedata) if param_select != PARAM_SELECT: all_params = config['params'] config['params'] = [] for param in all_params: if param['name'] in param_select: config['params'].append(param) config['inputs'] = process_vars(ins, aliases, timedata) config['baseSeq'], dummy = steps.readFiles(job['header'].get('init', [[]])[0]) config['particles'] = int(job['header'].get('particles', [[NPARTICLES]])[0][0]) config['nbatch'] = int(job['header'].get('nbatch', [[NBATCH]])[0][0]) config['modelKernel'] = float(job['header'].get('modelKernel', [[MODELKERNEL]])[0][0]) config['finalepsilon'] = float(job['header'].get('finalepsilon', [[FINALEPSILON]])[0][0]) config['alpha'] = float(job['header'].get('alpha', [[ALPHA]])[0][0]) config['beta'] = int(job['header'].get('beta', [[BETA]])[0][0]) config['timeout'] = int(job['header'].get('timeout', [[model_bcmd.TIMEOUT]])[0][0]) # ABC-SYSBIO distance funcs have the same names as ordinary ones but with suffix "Distance" # TODO: handle loglik sigma factory config['distance'] = getattr(distance, job['header'].get('distance', [[DISTANCE]])[0][0] + "Distance") return config