if args.data and (len(args.data) != len(args.legend)): sys.exit('Number of legend items must equal the number of data files') elif args.behavioralData and (len(args.behavioralData) != len( args.legend)): sys.exit('Number of legend items must equal the number of data files') # Open data and align to observed obs_align = None data = [] max_x = min_x = 0 if args.data: # Open observed data obs_file = open(args.obs, 'r') (obs_datetime, obs_data) = RHESSysOutput.readObservedDataFromFile(obs_file, readHour=False) obs_file.close() obs = pd.Series(obs_data, index=obs_datetime) for d in args.data: mod_file = open(d, 'r') (tmp_datetime, tmp_data) = RHESSysOutput.readColumnFromFile(mod_file, args.column, startHour=0) tmp_mod = pd.Series(tmp_data, index=tmp_datetime) # Align timeseries (mod_align, obs_align) = tmp_mod.align(obs, join='inner') tmp_max_x = max(mod_align.max(), obs_align.max()) if tmp_max_x > max_x: max_x = tmp_max_x
sys.exit('A secondary data file was specified, but the secondary column to use was not') if args.data and ( len(args.data) != len(args.legend) ): sys.exit('Number of legend items must equal the number of data files') elif args.behavioralData and ( len(args.behavioralData) != len(args.legend) ): sys.exit('Number of legend items must equal the number of data files') # Open data and align to observed obs_align = None data = [] max_x = min_x = 0 if args.data: # Open observed data obs_file = open(args.obs, 'r') (obs_datetime, obs_data) = RHESSysOutput.readObservedDataFromFile(obs_file, readHour=False) obs_file.close() obs = pd.Series(obs_data, index=obs_datetime) for d in args.data: mod_file = open(d, 'r') (tmp_datetime, tmp_data) = RHESSysOutput.readColumnFromFile(mod_file, args.column, startHour=0) tmp_mod = pd.Series(tmp_data, index=tmp_datetime) # Align timeseries (mod_align, obs_align) = tmp_mod.align(obs, join='inner') tmp_max_x = max(mod_align.max(), obs_align.max()) if tmp_max_x > max_x: max_x = tmp_max_x min_x = max(min_x, mod_align.min()) mod_file.close()
def readBehavioralData( self, basedir, session_id, variable="streamflow", observed_file=None, behavioral_filter=None, end_date=None ): dbPath = RHESSysCalibrator.getDBPath(basedir) if not os.access(dbPath, os.R_OK): raise IOError(errno.EACCES, "The database at %s is not readable" % dbPath) self.logger.debug("DB path: %s" % dbPath) outputPath = RHESSysCalibrator.getOutputPath(basedir) if not os.access(outputPath, os.R_OK): raise IOError(errno.EACCES, "The output directory %s is not readable" % outputPath) self.logger.debug("Output path: %s" % outputPath) rhessysPath = RHESSysCalibrator.getRhessysPath(basedir) calibratorDB = ModelRunnerDB(RHESSysCalibrator.getDBPath(basedir)) # Make sure the session exists session = calibratorDB.getSession(session_id) if None == session: raise Exception("Session %d was not found in the calibration database %s" % (session_id, dbPath)) if session.status != "complete": print "WARNING: session status is: %s. Some model runs may not have completed." % (session.status,) else: self.logger.debug("Session status is: %s" % (session.status,)) # Determine observation file path if observed_file: obs_file = observed_file else: # Get observered file from session assert session.obs_filename != None obs_file = session.obs_filename obsPath = RHESSysCalibrator.getObsPath(basedir) obsFilePath = os.path.join(obsPath, obs_file) if not os.access(obsFilePath, os.R_OK): raise IOError(errno.EACCES, "The observed data file %s is not readable" % obsFilePath) self.logger.debug("Obs path: %s" % obsFilePath) # Get runs in session runs = calibratorDB.getRunsInSession(session.id, where_clause=behavioral_filter) numRuns = len(runs) if numRuns == 0: raise Exception("No runs found for session %d" % (session.id,)) response = raw_input( "%d runs selected for plotting from session %d in basedir '%s', continue? [yes | no] " % (numRuns, session_id, os.path.basename(basedir)) ) response = response.lower() if response != "y" and response != "yes": # Exit normally return 0 self.logger.debug("%d behavioral runs" % (numRuns,)) # Read observed data from file obsFile = open(obsFilePath, "r") (obs_datetime, obs_data) = RHESSysOutput.readObservedDataFromFile(obsFile) obsFile.close() obs = pd.Series(obs_data, index=obs_datetime) if end_date: obs = obs[:end_date] self.logger.debug("Observed data: %s" % obs_data) likelihood = np.empty(numRuns) ysim = None x = None runsProcessed = False for (i, run) in enumerate(runs): if "DONE" == run.status: runOutput = os.path.join(rhessysPath, run.output_path) self.logger.debug(">>>\nOutput dir of run %d is %s" % (run.id, runOutput)) tmpOutfile = RHESSysCalibrator.getRunOutputFilePath(runOutput) if not os.access(tmpOutfile, os.R_OK): print "Output file %s for run %d not found or not readable, unable to calculate fitness statistics for this run" % ( tmpOutfile, run.id, ) continue tmpFile = open(tmpOutfile, "r") (tmp_datetime, tmp_data) = RHESSysOutput.readColumnFromFile(tmpFile, "streamflow") tmp_mod = pd.Series(tmp_data, index=tmp_datetime) # Align timeseries to observed (mod, obs) = tmp_mod.align(obs, join="inner") # Stash date for X values (assume they are the same for all runs if x == None: x = [datetime.strptime(str(d), "%Y-%m-%d %H:%M:%S") for d in mod.index] # Put data in matrix dataLen = len(mod) if ysim == None: # Allocate matrix for results ysim = np.empty((numRuns, dataLen)) assert np.shape(ysim)[1] == dataLen ysim[i,] = mod # Store fitness parameter likelihood[i] = run.nse tmpFile.close() runsProcessed = True return (runsProcessed, obs, x, ysim, likelihood)