Esempio n. 1
0
def plotGraph(args, obs, data, sizeX=1, sizeY=1, dpi=80):
    
    fig = plt.figure(figsize=(sizeX, sizeY), dpi=dpi, tight_layout=True)
    ax = fig.add_subplot(111)
    
    if args.plottype == PLOT_TYPE_STD or \
       args.plottype == PLOT_TYPE_LOGY:
        x = obs.index
    elif args.plottype == PLOT_TYPE_CDF:
        x = np.linspace(min_x, max_x, num=len(obs) )
    
    # Plot observed values
    # Standard or log plot
    obs_y = obs
    if args.plottype == PLOT_TYPE_CDF:
        obs_ecdf = sm.distributions.ECDF(obs)
        obs_y = obs_ecdf(x)
    obs_plt = None
    if not args.supressObs:
        (obs_plt,) = ax.plot(x, obs_y, linewidth=2.0, color='black')
        
    # Plot modeled values 
    data_plt = []
    for (i, d) in enumerate(data):
        # Standard or log plot
        mod_y = d
        if args.plottype == PLOT_TYPE_CDF:
            mod_ecdf = sm.distributions.ECDF(d)
            mod_y = mod_ecdf(x)
        
        # Plot (we could move this outside of the for loop)
        if args.linewidth:
            linewidth = args.linewidth[i]
        else:
            linewidth = 1.0
            
        if args.linestyle:
            linestyle = LINE_TYPE_DICT[ args.linestyle[i] ]
        else:
            # Rotate styles
            styleIdx = ( (i + 1) % NUM_LINE_TYPES ) - 1
            linestyle = LINE_TYPE_DICT[ LINE_TYPES[styleIdx] ]
            
        if args.color:
            (mod_plt,) = ax.plot(x, mod_y, linewidth=linewidth, linestyle=linestyle,
                                 color=args.color[i])
        else:
            (mod_plt,) = ax.plot(x, mod_y, linewidth=linewidth, linestyle=linestyle)
        
        data_plt.append(mod_plt)
    
    # Plot annotations
    columnName = args.column.capitalize()
    if args.title:
        title = args.title
    else:
        if args.plottype == PLOT_TYPE_STD:
            title = columnName
        elif args.plottype == PLOT_TYPE_LOGY:
            title = "log(%s)" % (columnName,)
        elif args.plottype == PLOT_TYPE_CDF:
            title = "Cummulative distribution - %s" % (columnName,) 
    fig.suptitle(title, y=0.99)

    # X-axis
    if args.plottype == PLOT_TYPE_STD or \
       args.plottype == PLOT_TYPE_LOGY:
        num_years = len(x) / 365
        if num_years > 4:
            if num_years > 10:
                ax.xaxis.set_major_locator(matplotlib.dates.YearLocator())
            else:
                ax.xaxis.set_major_locator(matplotlib.dates.MonthLocator(interval=3))
        else:
            ax.xaxis.set_major_locator(matplotlib.dates.MonthLocator())
        ax.xaxis.set_major_formatter(matplotlib.dates.DateFormatter('%b-%Y'))
        # Rotate
        plt.setp( ax.xaxis.get_majorticklabels(), rotation=45)
        plt.setp( ax.xaxis.get_majorticklabels(), fontsize='x-small')
    
    if args.plottype == PLOT_TYPE_CDF:
        ax.set_xlim(min_x, max_x)
        ax.set_xscale('log')
        if args.xlabel:
            ax.set_xlabel(args.xlabel)
        else:
            ax.set_xlabel( columnName )
    elif args.xlabel:
        ax.set_xlabel(args.xlabel)
    
    # Y-axis
    if args.plottype == PLOT_TYPE_LOGY:
        ax.set_yscale('log')
    
    if args.ylabel:
        ax.set_ylabel(args.ylabel)
    elif args.plottype != PLOT_TYPE_CDF:
        y_label = columnName
        if args.plottype == PLOT_TYPE_LOGY:
            y_label = "log( %s )" % (columnName,)
        ax.set_ylabel( y_label )
    
    if args.supressObs:
        legend_items = args.legend
    else:
        data_plt.insert(0, obs_plt)
        legend_items = ['Observed'] + args.legend
    
    # Plot secondary data (if specified)
    if args.secondaryData and \
       (args.plottype == PLOT_TYPE_STD or args.plottype == PLOT_TYPE_LOGY):
        sec_file = open(args.secondaryData, 'r')
        (sec_datetime, sec_data) = RHESSysOutput.readColumnFromFile(sec_file,
                                                                    args.secondaryColumn,
                                                                    startHour=0)
        sec_file.close()
        sec = pd.Series(sec_data, index=sec_datetime)
        # Align timeseries
        (sec_align, obs_align) = sec.align(obs, join='inner')
        # Plot
        ax2 = ax.twinx()
        if args.secondaryPlotType == 'line':
            (sec_plot,) = ax2.plot(x, sec_align)
        elif args.secondaryPlotType == 'bar':
            sec_plot = ax2.bar(x, sec_align, facecolor='blue', edgecolor='none', width=2.0)
        secondaryLabel = args.secondaryColumn.capitalize()
        if args.secondaryLabel:
            secondaryLabel = args.secondaryLabel
        ax2.invert_yaxis()
        ax2.set_ylabel(args.secondaryLabel)
    #ax.set_zorder(ax2.get_zorder()+1) # put ax in front of ax2
    #ax.patch.set_visible(False) # hide the 'canvas' 
    
    # Plot legend last
    num_cols = len(data)
    if not args.supressObs:
        num_cols += 1
    
    if args.plottype == PLOT_TYPE_CDF:
        fig.legend( data_plt, legend_items, 'lower center', fontsize='x-small', 
                    bbox_to_anchor=(0.5, -0.015), ncol=num_cols, frameon=False )
    else:
        fig.legend( data_plt, legend_items, 'lower center', fontsize='x-small', 
                    bbox_to_anchor=(0.5, -0.01), ncol=num_cols, frameon=False )
Esempio n. 2
0
 # Open data and align to observed
 obs_align = None
 data = []
 max_x = min_x = 0
 
 if args.data:
     # Open observed data
     obs_file = open(args.obs, 'r')
     (obs_datetime, obs_data) = RHESSysOutput.readObservedDataFromFile(obs_file,
                                                                       readHour=False)
     obs_file.close()
     obs = pd.Series(obs_data, index=obs_datetime)
     
     for d in args.data:
         mod_file = open(d, 'r')
         (tmp_datetime, tmp_data) = RHESSysOutput.readColumnFromFile(mod_file, args.column, startHour=0)
         tmp_mod = pd.Series(tmp_data, index=tmp_datetime)
         # Align timeseries
         (mod_align, obs_align) = tmp_mod.align(obs, join='inner')
         tmp_max_x = max(mod_align.max(), obs_align.max())
         if tmp_max_x > max_x:
             max_x = tmp_max_x
         min_x = max(min_x, mod_align.min())
     
         mod_file.close()
         data.append( mod_align )
 elif args.behavioralData:
     
     # Open observed data (behavioral data has hour in it, so we need to read obs. data differently)
     obs_file = open(args.obs, 'r')
     (obs_datetime, obs_data) = RHESSysOutput.readObservedDataFromFile(obs_file,
Esempio n. 3
0
    max_x = min_x = 0

    if args.data:
        # Open observed data
        obs_file = open(args.obs, 'r')
        (obs_datetime,
         obs_data) = RHESSysOutput.readObservedDataFromFile(obs_file,
                                                            readHour=False)
        obs_file.close()
        obs = pd.Series(obs_data, index=obs_datetime)

        for d in args.data:
            mod_file = open(d, 'r')
            (tmp_datetime,
             tmp_data) = RHESSysOutput.readColumnFromFile(mod_file,
                                                          args.column,
                                                          startHour=0)
            tmp_mod = pd.Series(tmp_data, index=tmp_datetime)
            # Align timeseries
            (mod_align, obs_align) = tmp_mod.align(obs, join='inner')
            tmp_max_x = max(mod_align.max(), obs_align.max())
            if tmp_max_x > max_x:
                max_x = tmp_max_x
            min_x = max(min_x, mod_align.min())

            mod_file.close()
            data.append(mod_align)
    elif args.behavioralData:

        # Open observed data (behavioral data has hour in it, so we need to read obs. data differently)
        obs_file = open(args.obs, 'r')
Esempio n. 4
0
def plotGraph(args, obs, data, sizeX=1, sizeY=1, dpi=80):

    fig = plt.figure(figsize=(sizeX, sizeY), dpi=dpi, tight_layout=True)
    ax = fig.add_subplot(111)

    if args.plottype == PLOT_TYPE_STD or \
       args.plottype == PLOT_TYPE_LOGY:
        x = obs.index
    elif args.plottype == PLOT_TYPE_CDF:
        x = np.linspace(min_x, max_x, num=len(obs))

    # Plot observed values
    # Standard or log plot
    obs_y = obs
    if args.plottype == PLOT_TYPE_CDF:
        obs_ecdf = sm.distributions.ECDF(obs)
        obs_y = obs_ecdf(x)
    obs_plt = None
    if not args.supressObs:
        (obs_plt, ) = ax.plot(x, obs_y, linewidth=2.0, color='black')

    # Plot modeled values
    data_plt = []
    for (i, d) in enumerate(data):
        # Standard or log plot
        mod_y = d
        if args.plottype == PLOT_TYPE_CDF:
            mod_ecdf = sm.distributions.ECDF(d)
            mod_y = mod_ecdf(x)

        # Plot (we could move this outside of the for loop)
        if args.linewidth:
            linewidth = args.linewidth[i]
        else:
            linewidth = 1.0

        if args.linestyle:
            linestyle = LINE_TYPE_DICT[args.linestyle[i]]
        else:
            # Rotate styles
            styleIdx = ((i + 1) % NUM_LINE_TYPES) - 1
            linestyle = LINE_TYPE_DICT[LINE_TYPES[styleIdx]]

        if args.color:
            (mod_plt, ) = ax.plot(x,
                                  mod_y,
                                  linewidth=linewidth,
                                  linestyle=linestyle,
                                  color=args.color[i])
        else:
            (mod_plt, ) = ax.plot(x,
                                  mod_y,
                                  linewidth=linewidth,
                                  linestyle=linestyle)

        data_plt.append(mod_plt)

    # Plot annotations
    columnName = args.column.capitalize()
    if args.title:
        title = args.title
    else:
        if args.plottype == PLOT_TYPE_STD:
            title = columnName
        elif args.plottype == PLOT_TYPE_LOGY:
            title = "log(%s)" % (columnName, )
        elif args.plottype == PLOT_TYPE_CDF:
            title = "Cummulative distribution - %s" % (columnName, )
    fig.suptitle(title, y=0.99)

    # X-axis
    if args.plottype == PLOT_TYPE_STD or \
       args.plottype == PLOT_TYPE_LOGY:
        num_years = len(x) / 365
        if num_years > 4:
            if num_years > 10:
                ax.xaxis.set_major_locator(matplotlib.dates.YearLocator())
            else:
                ax.xaxis.set_major_locator(
                    matplotlib.dates.MonthLocator(interval=3))
        else:
            ax.xaxis.set_major_locator(matplotlib.dates.MonthLocator())
        ax.xaxis.set_major_formatter(matplotlib.dates.DateFormatter('%b-%Y'))
        # Rotate
        plt.setp(ax.xaxis.get_majorticklabels(), rotation=45)
        plt.setp(ax.xaxis.get_majorticklabels(), fontsize='x-small')

    if args.plottype == PLOT_TYPE_CDF:
        ax.set_xlim(min_x, max_x)
        ax.set_xscale('log')
        if args.xlabel:
            ax.set_xlabel(args.xlabel)
        else:
            ax.set_xlabel(columnName)
    elif args.xlabel:
        ax.set_xlabel(args.xlabel)

    # Y-axis
    if args.plottype == PLOT_TYPE_LOGY:
        ax.set_yscale('log')

    if args.ylabel:
        ax.set_ylabel(args.ylabel)
    elif args.plottype != PLOT_TYPE_CDF:
        y_label = columnName
        if args.plottype == PLOT_TYPE_LOGY:
            y_label = "log( %s )" % (columnName, )
        ax.set_ylabel(y_label)

    if args.supressObs:
        legend_items = args.legend
    else:
        data_plt.insert(0, obs_plt)
        legend_items = ['Observed'] + args.legend

    # Plot secondary data (if specified)
    if args.secondaryData and \
       (args.plottype == PLOT_TYPE_STD or args.plottype == PLOT_TYPE_LOGY):
        sec_file = open(args.secondaryData, 'r')
        (sec_datetime,
         sec_data) = RHESSysOutput.readColumnFromFile(sec_file,
                                                      args.secondaryColumn,
                                                      startHour=0)
        sec_file.close()
        sec = pd.Series(sec_data, index=sec_datetime)
        # Align timeseries
        (sec_align, obs_align) = sec.align(obs, join='inner')
        # Plot
        ax2 = ax.twinx()
        if args.secondaryPlotType == 'line':
            (sec_plot, ) = ax2.plot(x, sec_align)
        elif args.secondaryPlotType == 'bar':
            sec_plot = ax2.bar(x,
                               sec_align,
                               facecolor='blue',
                               edgecolor='none',
                               width=2.0)
        secondaryLabel = args.secondaryColumn.capitalize()
        if args.secondaryLabel:
            secondaryLabel = args.secondaryLabel
        ax2.invert_yaxis()
        ax2.set_ylabel(args.secondaryLabel)
    #ax.set_zorder(ax2.get_zorder()+1) # put ax in front of ax2
    #ax.patch.set_visible(False) # hide the 'canvas'

    # Plot legend last
    num_cols = len(data)
    if not args.supressObs:
        num_cols += 1

    if args.plottype == PLOT_TYPE_CDF:
        fig.legend(data_plt,
                   legend_items,
                   'lower center',
                   fontsize='x-small',
                   bbox_to_anchor=(0.5, -0.015),
                   ncol=num_cols,
                   frameon=False)
    else:
        fig.legend(data_plt,
                   legend_items,
                   'lower center',
                   fontsize='x-small',
                   bbox_to_anchor=(0.5, -0.01),
                   ncol=num_cols,
                   frameon=False)
    def readBehavioralData(
        self, basedir, session_id, variable="streamflow", observed_file=None, behavioral_filter=None, end_date=None
    ):

        dbPath = RHESSysCalibrator.getDBPath(basedir)
        if not os.access(dbPath, os.R_OK):
            raise IOError(errno.EACCES, "The database at %s is not readable" % dbPath)
        self.logger.debug("DB path: %s" % dbPath)

        outputPath = RHESSysCalibrator.getOutputPath(basedir)
        if not os.access(outputPath, os.R_OK):
            raise IOError(errno.EACCES, "The output directory %s is  not readable" % outputPath)
        self.logger.debug("Output path: %s" % outputPath)

        rhessysPath = RHESSysCalibrator.getRhessysPath(basedir)

        calibratorDB = ModelRunnerDB(RHESSysCalibrator.getDBPath(basedir))

        # Make sure the session exists
        session = calibratorDB.getSession(session_id)
        if None == session:
            raise Exception("Session %d was not found in the calibration database %s" % (session_id, dbPath))
        if session.status != "complete":
            print "WARNING: session status is: %s.  Some model runs may not have completed." % (session.status,)
        else:
            self.logger.debug("Session status is: %s" % (session.status,))

        # Determine observation file path
        if observed_file:
            obs_file = observed_file
        else:
            # Get observered file from session
            assert session.obs_filename != None
            obs_file = session.obs_filename
        obsPath = RHESSysCalibrator.getObsPath(basedir)
        obsFilePath = os.path.join(obsPath, obs_file)
        if not os.access(obsFilePath, os.R_OK):
            raise IOError(errno.EACCES, "The observed data file %s is  not readable" % obsFilePath)
        self.logger.debug("Obs path: %s" % obsFilePath)

        # Get runs in session
        runs = calibratorDB.getRunsInSession(session.id, where_clause=behavioral_filter)
        numRuns = len(runs)
        if numRuns == 0:
            raise Exception("No runs found for session %d" % (session.id,))
        response = raw_input(
            "%d runs selected for plotting from session %d in basedir '%s', continue? [yes | no] "
            % (numRuns, session_id, os.path.basename(basedir))
        )
        response = response.lower()
        if response != "y" and response != "yes":
            # Exit normally
            return 0
        self.logger.debug("%d behavioral runs" % (numRuns,))

        # Read observed data from file
        obsFile = open(obsFilePath, "r")
        (obs_datetime, obs_data) = RHESSysOutput.readObservedDataFromFile(obsFile)
        obsFile.close()
        obs = pd.Series(obs_data, index=obs_datetime)
        if end_date:
            obs = obs[:end_date]

        self.logger.debug("Observed data: %s" % obs_data)

        likelihood = np.empty(numRuns)
        ysim = None
        x = None

        runsProcessed = False
        for (i, run) in enumerate(runs):
            if "DONE" == run.status:
                runOutput = os.path.join(rhessysPath, run.output_path)
                self.logger.debug(">>>\nOutput dir of run %d is %s" % (run.id, runOutput))
                tmpOutfile = RHESSysCalibrator.getRunOutputFilePath(runOutput)
                if not os.access(tmpOutfile, os.R_OK):
                    print "Output file %s for run %d not found or not readable, unable to calculate fitness statistics for this run" % (
                        tmpOutfile,
                        run.id,
                    )
                    continue

                tmpFile = open(tmpOutfile, "r")

                (tmp_datetime, tmp_data) = RHESSysOutput.readColumnFromFile(tmpFile, "streamflow")
                tmp_mod = pd.Series(tmp_data, index=tmp_datetime)
                # Align timeseries to observed
                (mod, obs) = tmp_mod.align(obs, join="inner")

                # Stash date for X values (assume they are the same for all runs
                if x == None:
                    x = [datetime.strptime(str(d), "%Y-%m-%d %H:%M:%S") for d in mod.index]

                # Put data in matrix
                dataLen = len(mod)
                if ysim == None:
                    # Allocate matrix for results
                    ysim = np.empty((numRuns, dataLen))
                assert np.shape(ysim)[1] == dataLen
                ysim[i,] = mod

                # Store fitness parameter
                likelihood[i] = run.nse

                tmpFile.close()
                runsProcessed = True

        return (runsProcessed, obs, x, ysim, likelihood)