Ejemplo n.º 1
0
    if args.data and (len(args.data) != len(args.legend)):
        sys.exit('Number of legend items must equal the number of data files')
    elif args.behavioralData and (len(args.behavioralData) != len(
            args.legend)):
        sys.exit('Number of legend items must equal the number of data files')

    # Open data and align to observed
    obs_align = None
    data = []
    max_x = min_x = 0

    if args.data:
        # Open observed data
        obs_file = open(args.obs, 'r')
        (obs_datetime,
         obs_data) = RHESSysOutput.readObservedDataFromFile(obs_file,
                                                            readHour=False)
        obs_file.close()
        obs = pd.Series(obs_data, index=obs_datetime)

        for d in args.data:
            mod_file = open(d, 'r')
            (tmp_datetime,
             tmp_data) = RHESSysOutput.readColumnFromFile(mod_file,
                                                          args.column,
                                                          startHour=0)
            tmp_mod = pd.Series(tmp_data, index=tmp_datetime)
            # Align timeseries
            (mod_align, obs_align) = tmp_mod.align(obs, join='inner')
            tmp_max_x = max(mod_align.max(), obs_align.max())
            if tmp_max_x > max_x:
                max_x = tmp_max_x
Ejemplo n.º 2
0
        sys.exit('A secondary data file was specified, but the secondary column to use was not')
    
    if args.data and ( len(args.data) != len(args.legend) ):
        sys.exit('Number of legend items must equal the number of data files')
    elif args.behavioralData and ( len(args.behavioralData) != len(args.legend) ):
        sys.exit('Number of legend items must equal the number of data files')

    # Open data and align to observed
    obs_align = None
    data = []
    max_x = min_x = 0
    
    if args.data:
        # Open observed data
        obs_file = open(args.obs, 'r')
        (obs_datetime, obs_data) = RHESSysOutput.readObservedDataFromFile(obs_file,
                                                                          readHour=False)
        obs_file.close()
        obs = pd.Series(obs_data, index=obs_datetime)
        
        for d in args.data:
            mod_file = open(d, 'r')
            (tmp_datetime, tmp_data) = RHESSysOutput.readColumnFromFile(mod_file, args.column, startHour=0)
            tmp_mod = pd.Series(tmp_data, index=tmp_datetime)
            # Align timeseries
            (mod_align, obs_align) = tmp_mod.align(obs, join='inner')
            tmp_max_x = max(mod_align.max(), obs_align.max())
            if tmp_max_x > max_x:
                max_x = tmp_max_x
            min_x = max(min_x, mod_align.min())
        
            mod_file.close()
    def readBehavioralData(
        self, basedir, session_id, variable="streamflow", observed_file=None, behavioral_filter=None, end_date=None
    ):

        dbPath = RHESSysCalibrator.getDBPath(basedir)
        if not os.access(dbPath, os.R_OK):
            raise IOError(errno.EACCES, "The database at %s is not readable" % dbPath)
        self.logger.debug("DB path: %s" % dbPath)

        outputPath = RHESSysCalibrator.getOutputPath(basedir)
        if not os.access(outputPath, os.R_OK):
            raise IOError(errno.EACCES, "The output directory %s is  not readable" % outputPath)
        self.logger.debug("Output path: %s" % outputPath)

        rhessysPath = RHESSysCalibrator.getRhessysPath(basedir)

        calibratorDB = ModelRunnerDB(RHESSysCalibrator.getDBPath(basedir))

        # Make sure the session exists
        session = calibratorDB.getSession(session_id)
        if None == session:
            raise Exception("Session %d was not found in the calibration database %s" % (session_id, dbPath))
        if session.status != "complete":
            print "WARNING: session status is: %s.  Some model runs may not have completed." % (session.status,)
        else:
            self.logger.debug("Session status is: %s" % (session.status,))

        # Determine observation file path
        if observed_file:
            obs_file = observed_file
        else:
            # Get observered file from session
            assert session.obs_filename != None
            obs_file = session.obs_filename
        obsPath = RHESSysCalibrator.getObsPath(basedir)
        obsFilePath = os.path.join(obsPath, obs_file)
        if not os.access(obsFilePath, os.R_OK):
            raise IOError(errno.EACCES, "The observed data file %s is  not readable" % obsFilePath)
        self.logger.debug("Obs path: %s" % obsFilePath)

        # Get runs in session
        runs = calibratorDB.getRunsInSession(session.id, where_clause=behavioral_filter)
        numRuns = len(runs)
        if numRuns == 0:
            raise Exception("No runs found for session %d" % (session.id,))
        response = raw_input(
            "%d runs selected for plotting from session %d in basedir '%s', continue? [yes | no] "
            % (numRuns, session_id, os.path.basename(basedir))
        )
        response = response.lower()
        if response != "y" and response != "yes":
            # Exit normally
            return 0
        self.logger.debug("%d behavioral runs" % (numRuns,))

        # Read observed data from file
        obsFile = open(obsFilePath, "r")
        (obs_datetime, obs_data) = RHESSysOutput.readObservedDataFromFile(obsFile)
        obsFile.close()
        obs = pd.Series(obs_data, index=obs_datetime)
        if end_date:
            obs = obs[:end_date]

        self.logger.debug("Observed data: %s" % obs_data)

        likelihood = np.empty(numRuns)
        ysim = None
        x = None

        runsProcessed = False
        for (i, run) in enumerate(runs):
            if "DONE" == run.status:
                runOutput = os.path.join(rhessysPath, run.output_path)
                self.logger.debug(">>>\nOutput dir of run %d is %s" % (run.id, runOutput))
                tmpOutfile = RHESSysCalibrator.getRunOutputFilePath(runOutput)
                if not os.access(tmpOutfile, os.R_OK):
                    print "Output file %s for run %d not found or not readable, unable to calculate fitness statistics for this run" % (
                        tmpOutfile,
                        run.id,
                    )
                    continue

                tmpFile = open(tmpOutfile, "r")

                (tmp_datetime, tmp_data) = RHESSysOutput.readColumnFromFile(tmpFile, "streamflow")
                tmp_mod = pd.Series(tmp_data, index=tmp_datetime)
                # Align timeseries to observed
                (mod, obs) = tmp_mod.align(obs, join="inner")

                # Stash date for X values (assume they are the same for all runs
                if x == None:
                    x = [datetime.strptime(str(d), "%Y-%m-%d %H:%M:%S") for d in mod.index]

                # Put data in matrix
                dataLen = len(mod)
                if ysim == None:
                    # Allocate matrix for results
                    ysim = np.empty((numRuns, dataLen))
                assert np.shape(ysim)[1] == dataLen
                ysim[i,] = mod

                # Store fitness parameter
                likelihood[i] = run.nse

                tmpFile.close()
                runsProcessed = True

        return (runsProcessed, obs, x, ysim, likelihood)