Example #1
0
 def getObsStatsForDate(self, platform, obsName, uom, sOrder, beginDate, endDate):
   obsStats = stats()
   sensorID = self.dbConnection.sensorExists(obsName, uom, platform, sOrder);
   if(sensorID != None and sensorID != -1):
     sql = "SELECT m_date,m_value FROM multi_obs WHERE (m_date >= '%s' AND m_date <= '%s') AND sensor_id=%d"\
           %(beginDate,endDate,sensorID)
     dbCursor = self.dbConnection.executeQuery(sql)
     if(dbCursor != None):
       for row in dbCursor:
         m_value = float(row['m_value'])
         obsStats.addValue(m_value)
       #Calculate the statistics.
       obsStats.doCalculations()
     dbCursor.close() 
   return(obsStats)  
Example #2
0
 def getObsStatsForDate(self, platform, obsName, uom, sOrder, beginDate, endDate):
     obsStats = stats()
     sensorID = self.dbConnection.sensorExists(obsName, uom, platform, sOrder)
     if sensorID != None and sensorID != -1:
         sql = "SELECT m_date,m_value FROM multi_obs WHERE (m_date >= '%s' AND m_date <= '%s') AND sensor_id=%d" % (
             beginDate,
             endDate,
             sensorID,
         )
         dbCursor = self.dbConnection.executeQuery(sql)
         if dbCursor != None:
             for row in dbCursor:
                 m_value = float(row["m_value"])
                 obsStats.addValue(m_value)
             # Calculate the statistics.
             obsStats.doCalculations()
         dbCursor.close()
     return obsStats
Example #3
0
    def computeMonthlyDataPoints(self, platformList, beginYear, endYear, QAQCFlags, outputFilePath, writeRawDataPoints):
        import calendar

        # If we want to use the qc_level to determine which data to include, let's build the SQL for this.
        # qaqcWHERE = ''
        # if(len(QAQCFlags)):
        #  for qcLevel in QAQCFlags:
        #    if(len(qaqcWHERE)):
        #      qaqcWHERE += 'OR '
        #    qaqcWHERE += "qc_level=%d " % (qcLevel)
        #  qaqcWHERE = "AND (%s)" %(qaqcWHERE)
        for platformHandle in platformList:
            # Get all the observations on the platform
            platformNfoCur = self.dbConnection.getPlatformInfo(platformHandle)
            # Platform doesn't seem to exist, so move on.
            if platformNfoCur == None:
                continue
            platformNfo = platformNfoCur.fetchone()
            platformID = int(platformNfo["row_id"])
            platformNfoCur.close()

            sql = (
                "SELECT\
            obs_type.standard_name \
            ,uom_type.standard_name as uom \
            ,sensor.row_id as sensor_id\
            ,sensor.m_type_id as m_type_id\
            ,sensor.s_order as s_order\
          FROM sensor \
            left join m_type on m_type.row_id=sensor.m_type_id \
            left join m_scalar_type on m_scalar_type.row_id=m_type.m_scalar_type_id \
            left join obs_type on obs_type.row_id=m_scalar_type.obs_type_id \
            left join uom_type on uom_type.row_id=m_scalar_type.uom_type_id \
            WHERE sensor.platform_id = %d ORDER BY obs_type.standard_name ASC"
                % (platformID)
            )

            sensorCur = self.dbConnection.executeQuery(sql)
            # No sensors available on the platform.
            if sensorCur == None:
                continue
            sensorNfo = recursivedefaultdict()
            for row in sensorCur:
                sensorNfo[row["standard_name"]]["uom"] = row["uom"]
                sensorNfo[row["standard_name"]]["sensor_id"] = int(row["sensor_id"])
                sensorNfo[row["standard_name"]]["m_type_id"] = int(row["m_type_id"])
                sensorNfo[row["standard_name"]]["sorder"] = int(row["s_order"])
            sensorCur.close()

            outputFile = "%s/%s-yearly-stats-%s_%s.csv" % (outputFilePath, platformHandle, beginYear, endYear)
            statsFile = open(outputFile, "w")
            statsFile.write("Observation,StartDate,EndDate,Min,Max,Average,StdDev,90thPercentile,TotalRecordCount\n")
            rawDataPoints = None
            if writeRawDataPoints:
                outputFile = "%s/%s-yearly-raw.csv" % (outputFilePath, platformHandle)
                rawDataPoints = open(outputFile, "w")
            if rawDataPoints != None:
                rawDataPoints.write("Observation,StartDate,EndDate,Data\n")

            yearList = []
            if beginYear == None:
                # Get the distinct years
                if self.dbConnection.dbType == dbTypes.PostGRES:
                    sql = (
                        "SELECT DISTINCT(EXTRACT(YEAR FROM m_date)) as year FROM multi_obs WHERE platform_handle='%s'"
                        % (platformHandle)
                    )
                else:
                    sql = (
                        "SELECT DISTINCT(strftime('%%Y', m_date)) as year FROM multi_obs WHERE platform_handle='%s'"
                        % (platformHandle)
                    )
                dbCursor = self.dbConnection.executeQuery(sql)
                if dbCursor != None:
                    for row in dbCursor:
                        yearList.append(int(row["year"]))
                    dbCursor.close()
            else:
                for i in range(beginYear, endYear + 1):
                    yearList.append(i)

            # This is a dictionary we use to hold all the months of data for the years. We use it as a collection
            # of stats() objects so we can calculate some overall stats for each month over the years.
            # obsOverallMonthStats = recursivedefaultdict()
            print("Processing: %s" % (platformHandle))
            for year in yearList:
                for obsName in sensorNfo:
                    uom = sensorNfo[obsName]["uom"]
                    sensorID = sensorNfo[obsName]["sensor_id"]
                    mTypeID = sensorNfo[obsName]["m_type_id"]
                    # sOrder = sensorNfo[obsName]['sorder']
                    # Now for each month, we calc stats on the data.
                    for month in range(1, 13):
                        print("Obs: %s(%s) Year: %d Month: %d" % (obsName, uom, year, month))
                        monthStats = stats()
                        dayCnt = calendar.monthrange(year, month)
                        startDate = "%d-%02d-%02dT00:00:00" % (year, month, 1)
                        endDate = "%d-%02d-%2dT24:00:00" % (year, month, dayCnt[1])
                        if rawDataPoints != None:
                            rawDataPoints.write("%s,%s,%s" % (obsName, startDate, endDate))

                        # mTypeID = self.dbConnection.getMTypeFromObsName(obsName, uom, platformHandle, sOrder)
                        # sql = "SELECT m_date,m_value FROM multi_obs WHERE (m_date >= '%s' AND m_date <= '%s')\
                        #       AND sensor_id=%d %s;"\
                        #      %(startDate,endDate,sensorID,qaqcWHERE)
                        sql = (
                            "SELECT m_date,m_value,qc_level FROM multi_obs WHERE (m_date >= '%s' AND m_date <= '%s')\
                   AND sensor_id=%d;"
                            % (startDate, endDate, sensorID)
                        )
                        dbCursor = self.dbConnection.executeQuery(sql)
                        if dbCursor != None:
                            for row in dbCursor:
                                goodVal = False
                                # Use all data.
                                if len(QAQCFlags) == 0:
                                    goodVal = True
                                elif row["qc_level"] != None:
                                    for qaqcFlag in QAQCFlags:
                                        if qaqcFlag == row["qc_level"]:
                                            goodVal = True
                                            break
                                if goodVal:
                                    m_value = row["m_value"]
                                    if m_value != None:
                                        m_value = float(m_value)
                                        monthStats.addValue(m_value)
                                        if rawDataPoints != None:
                                            rawDataPoints.write(",%f" % (m_value))

                            monthStats.doCalculations()
                            avg = monthStats.average
                            if avg == None:
                                avg = -1.0
                            stdDev = monthStats.stdDev
                            if stdDev == None:
                                stdDev = -1.0
                            popStdDev = monthStats.populationStdDev
                            if popStdDev == None:
                                popStdDev = -1.0

                            UpperPercentile = monthStats.getValueAtPercentile(90)
                            if UpperPercentile == None:
                                UpperPercentile = -1.0
                            min = monthStats.minVal
                            if min == None:
                                min = -1.0
                            max = monthStats.maxVal
                            if max == None:
                                max = -1.0

                            statsFile.write(
                                "%s,%s,%s,%f,%f,%f,%f,%f,%d\n"
                                % (
                                    obsName,
                                    startDate,
                                    endDate,
                                    min,
                                    max,
                                    avg,
                                    stdDev,
                                    UpperPercentile,
                                    len(monthStats.items),
                                )
                            )
                            if rawDataPoints != None:
                                rawDataPoints.write("\n")
                        else:
                            i = 0
            statsFile.close()
            if rawDataPoints != None:
                rawDataPoints.close()
Example #4
0
  def computeMonthlyDataPoints(self, platformList, beginYear, endYear, QAQCFlags, outputFilePath,writeRawDataPoints):
    import calendar
    
    #If we want to use the qc_level to determine which data to include, let's build the SQL for this.
    #qaqcWHERE = ''
    #if(len(QAQCFlags)):
    #  for qcLevel in QAQCFlags:
    #    if(len(qaqcWHERE)):
    #      qaqcWHERE += 'OR '
    #    qaqcWHERE += "qc_level=%d " % (qcLevel)
    #  qaqcWHERE = "AND (%s)" %(qaqcWHERE)
    for platformHandle in platformList:
      #Get all the observations on the platform
      platformNfoCur = self.dbConnection.getPlatformInfo(platformHandle)
      #Platform doesn't seem to exist, so move on.
      if(platformNfoCur == None):
        continue
      platformNfo = platformNfoCur.fetchone()
      platformID = int(platformNfo['row_id'])
      platformNfoCur.close()
            
      sql= "SELECT\
            obs_type.standard_name \
            ,uom_type.standard_name as uom \
            ,sensor.row_id as sensor_id\
            ,sensor.m_type_id as m_type_id\
            ,sensor.s_order as s_order\
          FROM sensor \
            left join m_type on m_type.row_id=sensor.m_type_id \
            left join m_scalar_type on m_scalar_type.row_id=m_type.m_scalar_type_id \
            left join obs_type on obs_type.row_id=m_scalar_type.obs_type_id \
            left join uom_type on uom_type.row_id=m_scalar_type.uom_type_id \
            WHERE sensor.platform_id = %d ORDER BY obs_type.standard_name ASC"\
            %(platformID)
      
      sensorCur = self.dbConnection.executeQuery(sql)
      #No sensors available on the platform.
      if(sensorCur == None):
        continue
      sensorNfo = recursivedefaultdict()
      for row in sensorCur:       
        sensorNfo[row['standard_name']]['uom'] = row['uom']
        sensorNfo[row['standard_name']]['sensor_id'] = int(row['sensor_id'])
        sensorNfo[row['standard_name']]['m_type_id'] = int(row['m_type_id'])
        sensorNfo[row['standard_name']]['sorder'] = int(row['s_order'])
      sensorCur.close()
      
      outputFile = "%s/%s-yearly-stats-%s_%s.csv" %(outputFilePath,platformHandle,beginYear,endYear)
      statsFile = open(outputFile,'w')
      statsFile.write('Observation,StartDate,EndDate,Min,Max,Average,StdDev,90thPercentile,TotalRecordCount\n')
      rawDataPoints = None
      if(writeRawDataPoints):
        outputFile = "%s/%s-yearly-raw.csv" %(outputFilePath,platformHandle)
        rawDataPoints = open(outputFile, 'w')
      if(rawDataPoints != None):
        rawDataPoints.write('Observation,StartDate,EndDate,Data\n')
        
      yearList = []
      if(beginYear == None):
        # Get the distinct years
        if(self.dbConnection.dbType == dbTypes.PostGRES):
          sql = "SELECT DISTINCT(EXTRACT(YEAR FROM m_date)) as year FROM multi_obs WHERE platform_handle='%s'" %(platformHandle)
        else:
          sql = "SELECT DISTINCT(strftime('%%Y', m_date)) as year FROM multi_obs WHERE platform_handle='%s'" %(platformHandle)
        dbCursor = self.dbConnection.executeQuery(sql)
        if(dbCursor != None):
          for row in dbCursor:
            yearList.append(int(row['year']))
          dbCursor.close()
      else:
        for i in range(beginYear, endYear+1):
          yearList.append(i)
          
      #This is a dictionary we use to hold all the months of data for the years. We use it as a collection
      #of stats() objects so we can calculate some overall stats for each month over the years.
      #obsOverallMonthStats = recursivedefaultdict()
      print("Processing: %s" % (platformHandle))      
      for year in yearList:
        for obsName in sensorNfo:       
          uom = sensorNfo[obsName]['uom']
          sensorID = sensorNfo[obsName]['sensor_id']
          mTypeID = sensorNfo[obsName]['m_type_id']
          #sOrder = sensorNfo[obsName]['sorder']
          #Now for each month, we calc stats on the data.
          for month in range( 1,13 ):
            print("Obs: %s(%s) Year: %d Month: %d" %(obsName, uom, year, month))       
            monthStats = stats()
            dayCnt = calendar.monthrange(year, month)
            startDate = "%d-%02d-%02dT00:00:00" %(year,month,1)
            endDate = "%d-%02d-%2dT24:00:00" %(year,month,dayCnt[1])
            if(rawDataPoints != None):
              rawDataPoints.write("%s,%s,%s" %(obsName,startDate,endDate))

            
            #mTypeID = self.dbConnection.getMTypeFromObsName(obsName, uom, platformHandle, sOrder)
            #sql = "SELECT m_date,m_value FROM multi_obs WHERE (m_date >= '%s' AND m_date <= '%s')\
            #       AND sensor_id=%d %s;"\
            #      %(startDate,endDate,sensorID,qaqcWHERE)
            sql = "SELECT m_date,m_value,qc_level FROM multi_obs WHERE (m_date >= '%s' AND m_date <= '%s')\
                   AND sensor_id=%d;"\
                  %(startDate,endDate,sensorID)
            dbCursor = self.dbConnection.executeQuery(sql)
            if(dbCursor != None):
              for row in dbCursor:
                goodVal = False
                #Use all data.
                if(len(QAQCFlags) == 0):
                  goodVal = True                
                elif(row['qc_level'] != None):
                  for qaqcFlag in QAQCFlags:
                    if(qaqcFlag == row['qc_level']):
                      goodVal = True
                      break                
                if(goodVal):
                  m_value = row['m_value']
                  if(m_value != None):
                    m_value = float(m_value)
                    monthStats.addValue(m_value)
                    if(rawDataPoints != None):
                      rawDataPoints.write(",%f" %(m_value))
                
              monthStats.doCalculations()
              avg = monthStats.average
              if(avg == None):
                avg = -1.0
              stdDev = monthStats.stdDev
              if(stdDev == None):
                stdDev = -1.0
              popStdDev = monthStats.populationStdDev
              if(popStdDev == None):
                popStdDev = -1.0
                
              UpperPercentile = monthStats.getValueAtPercentile(90)
              if(UpperPercentile == None):
                UpperPercentile = -1.0
              min = monthStats.minVal
              if(min == None):
                min = -1.0
              max = monthStats.maxVal
              if(max == None):
                max = -1.0
                
              statsFile.write('%s,%s,%s,%f,%f,%f,%f,%f,%d\n'\
                              %(obsName,startDate,endDate,min,max,avg,stdDev,UpperPercentile,len(monthStats.items)))
              if(rawDataPoints != None):
                rawDataPoints.write("\n")
            else:
              i = 0
      statsFile.close()    
      if(rawDataPoints != None):
        rawDataPoints.close()