def getYearValue(year, bounds, clippedmask, dataType, operationsType): ''' :param year: :param bounds: :param clippedmask: :param dataType: :param operationsType: ''' logger.debug("getYear Value year=" + str(year) + " datatype=" + str(dataType)) mathoper = pMath.mathOperations(operationsType, 12, params.dataTypes[dataType]['fillValue'], None) try: store = dStore.datastorage(dataType, year) indexer = params.dataTypes[dataType]['indexer'] fillValue = params.getFillValue(dataType) indexes = indexer.getIndexesBasedOnDate(1, 1, year, 31, 12, year) for i in indexes: array = store.getData(i, bounds=bounds) mask = np.where((array != fillValue) & (clippedmask == True)) if np.size(mask) > 0: mathoper.addData(array[mask]) del mask del array store.close() value = mathoper.getOutput() mathoper.cleanup() return value except: return mathoper.getFillValue()
def processYearByDirectory(dataType,year, inputdir, ldate): ''' :param dataType: :param year: :param inputdir: ''' ###Process the incoming data dataupdated = False dataStore = dataS.datastorage(dataType, year, forWriting=True) indexer = params.dataTypes[dataType]['indexer'] for filename in os.listdir(inputdir): filesplit = filename.split('.') fyear = filesplit[1] fmonth = filesplit[2][:2] fday = filesplit[2][2:] fdatestring = fday + " " + fmonth + " " + fyear fdate = datetime.datetime.strptime(fdatestring, "%d %m %Y") if fdate > ldate: if filename.endswith(".tif") and os.stat(inputdir+"/"+filename).st_size > 0: dataupdated = True fileToProcess = inputdir+"/"+filename print "Processing "+fileToProcess directory, fileonly = os.path.split(fileToProcess) dictionary = dateutils.breakApartGEFSNewName(fileonly) #name convention changed, update needed year = dictionary['year'] month = dictionary['month'] day = dictionary['day'] sdate = "{0} {1} {2}".format(day, month, year) filedate = datetime.datetime.strptime(sdate, "%d %m %Y") ds = georead.openGeoTiff(fileToProcess) prj=ds.GetProjection() grid = ds.GetGeoTransform() # day = decad * 10 # if month == int(2) and day == int(30): # day = 28 img = georead.readBandFromFile(ds, 1) ds = None index = indexer.getIndexBasedOnDate(day,month,year) print "Index:",index c = np.array(dataStore.getData(index)) if(c==-9999).all() == True: dataStore.putData(index, img) else: print fdate.strftime('%Y.%m.%d') + " data already in hdf" else: print "file date b4 late date" dataStore.close() if dataupdated: dataS.writeSpatialInformation(params.dataTypes[dataType]['directory'],prj,grid,year)
def getArrayForYearMonthDay(year, month, day, dataType): ''' :param year: :param month: :param day: :param dataType: ''' try: store = dStore.datastorage(dataType, year) indexer = params.dataTypes[dataType]['indexer'] index = indexer.getIndexBasedOnDate(day, month, year) array = store.getData(index) return array store.close() except: return [] return array
def averageDecadalData(dataType, year, indexList): print "Averaging Decadal Data" dataStore = dataS.datastorage(dataType, year, forWriting=True) i = 0 try: while i < len(indexList) - 1: if np.issubdtype(type(indexList[i]), np.integer) != True and np.issubdtype( type(indexList[i + 2]), np.integer) != True: first = np.array(indexList[i]) second = np.array(indexList[i + 2]) #averaged = np.nanmean( np.array([ first , second ]) ) #averaged = (first + second) / 2 print "adding averaged data to index: " + str(i + 1) dataStore.putData(i + 1, averaged) i = i + 2 except Exception as e: print("there was an exception: " + str(e)) dataStore.close()
def processYearByDirectory(dataType, year, inputdir): ''' :param dataType: :param year: :param inputdir: ''' ###Process the incoming data dataStore = dataS.datastorage(dataType, year, forWriting=True) indexer = params.dataTypes[dataType]['indexer'] for filename in os.listdir(inputdir): if filename.endswith(".tif"): fileToProcess = inputdir + "/" + filename print "Processing " + fileToProcess directory, fileonly = os.path.split(fileToProcess) dictionary = dateutils.breakApartSmapName(fileonly) year = int(dictionary['year']) day = int(dictionary['day']) month = int(dictionary['month']) ds = georead.openGeoTiff(fileToProcess) prj = ds.GetProjection() grid = ds.GetGeoTransform() img = georead.readBandFromFile(ds, 1) ds = None index = indexer.getIndexBasedOnDate(day, month, year) print "Index:", index dataStore.putData(index, img) dataStore.close() dataS.writeSpatialInformation(params.dataTypes[dataType]['directory'], prj, grid, year)
def _get_DataStore(self, dataTypeNumber, yearValue, forWriting): retDataStore = dStore.datastorage(dataTypeNumber, yearValue, forWriting) return retDataStore
def processYearByDirectory(dataType, year, inputdir): ''' :param dataType: :param year: :param inputdir: ''' ###Process the incoming data global dataName if dataType == 29: dataName = 'esi4week' else: dataName = 'esi12week' filePattern = None with open('/data/data/cserv/www/html/json/stats.json', 'r+') as f: data = json.load(f) theDate = filter(getESIDate, data['items'])[0]['Latest'] filePattern = theDate.split(' ')[2] + str("%03d" % ( (datetime.datetime.strptime(theDate, '%d %M %Y') - datetime.datetime(2019, 1, 1)).days + 1, )) dataStore = dataS.datastorage(dataType, year, forWriting=True) indexer = params.dataTypes[dataType]['indexer'] for filename in os.listdir(inputdir): if filename.endswith(".tif") and int( getDatePattern(filename)) > int(filePattern): fileToProcess = inputdir + "/" + filename print "Processing " + fileToProcess directory, fileonly = os.path.split(fileToProcess) dictionary = dateutils.breakApartEsiName(fileonly) year = dictionary['year'] month = dictionary['month'] day = dictionary['day'] sdate = "{0} {1} {2}".format(day, month, year) filedate = datetime.datetime.strptime(sdate, "%d %m %Y") ds = georead.openGeoTiff(fileToProcess) prj = ds.GetProjection() grid = ds.GetGeoTransform() img = georead.readBandFromFile(ds, 1) ds = None index = indexer.getIndexBasedOnDate(day, month, year) print "Index:", index try: changed = False with open('/data/data/cserv/www/html/json/stats.json', 'r+') as f: data = json.load(f) for item in data['items']: print(item['name']) if (item['name'] == dataName): ldatestring = item['Latest'] ldate = datetime.datetime.strptime( ldatestring, "%d %m %Y") print("in here") print("ldate: " + str(ldate)) if ldate < filedate: print("file date is later") item['Latest'] = sdate changed = True if changed: f.seek( 0 ) # <--- should reset file position to the beginning. json.dump(data, f, indent=4) f.truncate() # remove remaining part except Exception as e: print(e) pass dataStore.putData(index, img) dataStore.close()
def processYearByDirectory(dataType,year, inputdir): ''' :param dataType: :param year: :param inputdir: ''' ###Process the incoming data prj= None #dataStore = dataS.datastorage(dataType, year, forWriting=True) indexer = params.dataTypes[dataType]['indexer'] for filename in os.listdir(inputdir): if filename.endswith(".tif"): try: dataStore = dataS.datastorage(dataType, year, forWriting=True) fileToProcess = inputdir+"/"+filename print "Processing "+fileToProcess directory, fileonly = os.path.split(fileToProcess) dictionary = dateutils.breakApartemodisNameAdjust(fileonly, 3) year = dictionary['year'] month = dictionary['month'] day = dictionary['day'] sdate = "{0} {1} {2}".format(day, month, year) filedate = datetime.datetime.strptime(sdate, "%d %m %Y") ds = georead.openGeoTiff(fileToProcess) prj=ds.GetProjection() grid = ds.GetGeoTransform() img = georead.readBandFromFile(ds, 1) try: xSize = params.dataTypes[dataType]['size'][0] img = np.delete(img, (xSize), axis=1) print("Accounting for pixel width differences.") except: pass try: ySize = params.dataTypes[dataType]['size'][1] img = np.delete(img, (ySize), axis=0) print("Accounting for pixel height differences.") except: pass ###Manipulate the data as based on FEWS.NET data document to get NDVI from data. #eMODIS NDVI data are stretched (mapped) linearly (to byte values) as follows: #[-1.0, 1.0] -> [0, 200] - Invalid Values: 201 - 255 #NDVI = (value - 100) / 100; example: [ (150 - 100) / 100 = 0.5 NDVI ] #print np.max(img) validmask = np.where(img<=200) invalidmask = np.where((img>200) | (img<100)) #print "Max during:",np.max(img[validmask]) img = img.astype(np.float32) img[validmask] = (img[validmask] - 100)/100. img[invalidmask] = img[invalidmask]*0+params.dataTypes[dataType]['fillValue'] #print np.max(img) ds = None index = indexer.getIndexBasedOnDate(day,month,year) print month,"/",day,"/",year,"--Index->",index #print "Index:",index #print "Index:",index try: changed = False with open('/data/data/cserv/www/html/json/stats.json', 'r+') as f: data = json.load(f) for item in data['items']: if(item['name'] == 'casndvi'): ldatestring = item['Latest'] ldate = datetime.datetime.strptime(ldatestring, "%d %m %Y") if ldate < filedate: print("file date is later") item['Latest'] = sdate changed = True if changed: f.seek(0) # <--- should reset file position to the beginning. json.dump(data, f, indent=4) f.truncate() # remove remaining part except Exception as e: print(e) pass dataStore.putData(index, img) img = None dataStore.close() except: print 'Failed adding tif' img = None dataStore.close() #dataStore.close() if prj is not None: dataS.writeSpatialInformation(params.dataTypes[dataType]['directory'],prj,grid,year)
def ingest_IMERG(startYYYYMMDD, endYYYYMMDD): # Set the Datatype number current_DataTypeNumber = 34 # Hardcoded until there are more IMERG types in here.. # Instance of Imerg Data Classes IMERG_DataClass = IDC.IMERG_Data() # Convert to dates dateFormat = "%Y%m%d" start_Date = datetime.datetime.strptime(startYYYYMMDD, dateFormat) end_Date = datetime.datetime.strptime(endYYYYMMDD, dateFormat) # Build expected string list dataset_Obj_List = [] #expected_Tif_FileNames = [] # # iterate through all dates delta = end_Date - start_Date for i in range(delta.days + 1): #print start_Date + datetime.timedelta(days=i) currentDate = start_Date + datetime.timedelta(days=i) tifFileName = IMERG_DataClass.get_Expected_Tif_FileName(currentDate.year, currentDate.month, currentDate.day) #expected_Tif_FileNames.append(tifFileName) obj_To_Append = { "Tif_File_Name":tifFileName, "year":currentDate.year, "month":currentDate.month, "day":currentDate.day } dataset_Obj_List.append(obj_To_Append) # Get the expected file names. # Folder where TIF and TFW files end up. input_Dataset_Folder = params.dataTypes[current_DataTypeNumber]['inputDataLocation'] # Other vars needed for the loop itemsCounter = 0 ingest_Error_List = [] capabilities_DateFormatString = "%Y_%m_%d" last_YYYY_MM_DD_Processed = None # Ingest specific stuff yearForHDF = int(startYYYYMMDD[0:4]) # Year for HDF File dataStore = dataS.datastorage(current_DataTypeNumber, yearForHDF, forWriting=True) indexer = params.dataTypes[current_DataTypeNumber]['indexer'] # Do the actual ingest. #for fileName in expected_Tif_FileNames: for currentObj in dataset_Obj_List: try: # Try to ingest the file, record error if there is an error # open the file fileName = currentObj['Tif_File_Name'] fileToProcess = os.path.join(input_Dataset_Folder,fileName) print(fileToProcess) if os.path.isfile(fileToProcess): print("") else: fileToProcess=fileToProcess.replace("03E","04A") if os.path.isfile(fileToProcess): print("") else: fileToProcess=fileToProcess.replace("04A","04B") print("-Processing File: " + str(fileToProcess)) # For some reason, we need to open TFW files instead of TIFs with GDAL.. fileToProcess_TFW = IMERG_DataClass.convert_TIF_FileName_To_TFW_Filename(fileToProcess) theYear = yearForHDF #currentObj['year'] theMonth = currentObj['month'] theDay = currentObj['day'] print("before geotiff") # Open / Read the file #ds = georead.openGeoTiff(fileToProcess_TFW) ds = georead.openGeoTiff_WithUpdateFlag(fileToProcess) print("after geotiff") # Set a new projection (since the IMERG data does not come with one already..) ds.SetProjection(IMERG_DataClass.get_DefaultProjection_String()) ds.SetGeoTransform(IMERG_DataClass.get_DefaultGeoTransform_Obj()) # Get the values to save (just like in all the other ingest procedures. prj = ds.GetProjection() grid = ds.GetGeoTransform() # Index it. img = georead.readBandFromFile(ds, 1) print img ds = None index = indexer.getIndexBasedOnDate(theDay, theMonth, theYear) #print "Index:",index dataStore.putData(index, img) last_YYYY_MM_DD_Processed = str(theYear)+ "_" + str("%02d" % theMonth) + "_" + str("%02d" % theDay) itemsCounter += 1 except: # do something in the event of an error e = sys.exc_info()[0] errorStr = "-ERROR Ingesting File: " + str(fileName) + " System Error Message: " + str(e) print(str(errorStr)) ingest_Error_List.append(errorStr) # Close and save the data dataStore.close() if(itemsCounter > 0): dataS.writeSpatialInformation(params.dataTypes[current_DataTypeNumber]['directory'],prj,grid,yearForHDF) #print("Debug: processedFileNames: " + str(processedFileNames)) #print("Debug: skippedFileNames: " + str(skippedFileNames)) print("Finished processing, " + str(itemsCounter) + ", data items for year: " + str(yearForHDF)) # need the projection and grid strings for the capabilities output. #retObject = {"projection":prj,"grid":grid} #return retObject # Update the capabilities try: print("-TODO, Check existing capabilities and overwrite only some parts rather than just overwriting with the last option... this was a shortcut taken to meet an expectation, budget about a day or so to fix this... right now, the last item ingested has it's date set as the 'END Date' for the capabilities range, (so if we are doing a simple reingest for a small subset in the middle of the data somewhere, this bug will show up..)") capabilities_Info = { "name":params.dataTypes[current_DataTypeNumber]['name'], "description":params.dataTypes[current_DataTypeNumber]['description'], "size":params.dataTypes[current_DataTypeNumber]['size'], "fillValue":params.dataTypes[current_DataTypeNumber]['fillValue'], "data_category":params.dataTypes[current_DataTypeNumber]['data_category'], "projection":prj, "grid":grid, # Get the start and end Date range. "startDateTime":"2015_03_08", "endDateTime":last_YYYY_MM_DD_Processed, "date_FormatString_For_ForecastRange":capabilities_DateFormatString # Other items to save? } # Write the capabilities info to the bddb theJSONString = json.dumps(capabilities_Info) # Create a connection to the DB, set the new values, close the connection conn = bdp.BDDbConnector_Capabilities() conn.set_DataType_Capabilities_JSON(current_DataTypeNumber, theJSONString) conn.close() print("-API Datatype Capabilities for datatype number: " +str(current_DataTypeNumber) + " written to local DB as: " + str(theJSONString)) except: print("-WARNING: Data was ingested on this run AND there was an issue updating the API Capabilities local DB") else: print("No Items found for year: " + str(yearForHDF)) print(str(len(ingest_Error_List)) + " errors associated with ingest items.") print("") print("Output of per-item Error Log: " + str(ingest_Error_List)) print("")
def getDayValue(year, month, day, bounds, clippedmask, dataType, operationsType, polygon_Str_ToPass, uid): # geometryToClip ''' :param year: :param month: :param day: :param bounds: :param clippedmask: :param dataType: This is actually the datatype number (int) :param operationsType: ''' # print "Getting Day value ",year,month,day #Single item in one dimension #Calculate index for the day using 31 days in every month logger.debug("getDay Value year=" + str(year) + " month=" + str(month) + " day=" + str(day) + " datatype=" + str(dataType)) # KS Refactor 2015 // This is where I'm intercepting the code to add the new 'download' operation at the worker thread level if (params.parameters[operationsType][1] == 'download'): # Do the download stuff #logger.debug("DataCalculator:getDayValue: TODO: Finish the code that creates a tif file from all the inputs we have here!") onErrorReturnValue = 0 # 0 for failures? (555 is just a place holder to see if this all works!!) try: # Param Checking (Compared to the test controller function in HDFDataToFile) theDataTypeNumber = dataType # formerly 'theDataType' size = params.getGridDimension(int(theDataTypeNumber)) geotransform, wkt = rp.getSpatialReference(int(theDataTypeNumber)) theBounds = bounds #mg.getPolyBoundsOnly(geoTrans,polygon): #polygon_Str_ToPass #geometry = geometryToClip # Had to pipe this one in as a new dictionary param from the head processor!!! geometry = geoutils.decodeGeoJSON(polygon_Str_ToPass) theYear = year # Get this from param 'year' (Passed in as part of a dictionary object) (also applies for month, and day) theMonth = month theDay = day # Worker Section theStore = dStore.datastorage(theDataTypeNumber, theYear) theIndexer = params.dataTypes[theDataTypeNumber]['indexer'] theFillValue = params.getFillValue(theDataTypeNumber) theIndex = theIndexer.getIndexBasedOnDate(theDay, theMonth, theYear) hdf_Data_Array = None try: hdf_Data_Array = theStore.getData(theIndex, bounds=theBounds) except: firstErrorMessage = str(sys.exc_info()) logger.debug( "DataCalculator: Download Job ERROR getting data from H5 to hdf_Data_Array: We are inside 2 try/except blocks. firstErrorMessage: " + str(firstErrorMessage) + ", Trying something crazy before bailing out!") # Last ditch effort, lets replace the buggy h5py functions try: # This did not work... it actually caused a crash that looked worse than the other one. #h5py._hl.selections._translate_slice = _ReplacementForFunc_translate_slice #hdf_Data_Array = theStore.getData_AlternateH5PyFunc(theIndex, _ReplacementForFunc_translate_slice, bounds=theBounds) # This did not work either, it ended up selecting inverse x range #hdf_Data_Array = theStore.getData_AlternateH5PyFunc(theIndex, bounds=theBounds) # Wrote a bit of code in my # Next attempt is to get two sets of bounds and two sets of datasets.... and then stitch them together!! # Here is the near final version of this breakPoint = 0 # I seriously can't believe I just wrote this block of code without testing it, and it seemed to work the first try!! theBounds_Part1 = (theBounds[0], (breakPoint - 1), theBounds[2], theBounds[3]) theBounds_Part2 = (breakPoint, theBounds[1], theBounds[2], theBounds[3]) hdf_Data_Array_Part1 = theStore.getData( theIndex, bounds=theBounds_Part1) hdf_Data_Array_Part2 = theStore.getData( theIndex, bounds=theBounds_Part2) theHeight_Of_New_Array = hdf_Data_Array_Part1.shape[0] theWidth_Of_New_Array = hdf_Data_Array_Part1.shape[ 1] + hdf_Data_Array_Part2.shape[1] stitchedData_Array = np.zeros( shape=(theHeight_Of_New_Array, theWidth_Of_New_Array), dtype=np.float32) for currentRowIndex in range(0, theHeight_Of_New_Array): tempRow = np.zeros(shape=(theWidth_Of_New_Array), dtype=np.float32) for currValueIndex_1 in range( 0, hdf_Data_Array_Part1.shape[1]): currentValue = hdf_Data_Array_Part1[ currentRowIndex][currValueIndex_1] tempRow[currValueIndex_1] = currentValue for currValueIndex_2 in range( 0, hdf_Data_Array_Part2.shape[1]): currentValueIndex_2_Adjusted = currValueIndex_2 + hdf_Data_Array_Part1.shape[ 1] currentValue = hdf_Data_Array_Part2[ currentRowIndex][currValueIndex_2] tempRow[ currentValueIndex_2_Adjusted] = currentValue stitchedData_Array[currentRowIndex] = tempRow # here goes... hdf_Data_Array = stitchedData_Array except: #e = sys.exc_info()[0] # If this error keeps happening and can't figure it out,, read HDFDataToFile line 138 to see some more detailed notes on this issue. logger.debug( "DataCalculator: Download Job ERROR getting data from H5 to hdf_Data_Array: We are inside 2 try/except blocks, and the second one failed.. firstErrorMessage: " + str(firstErrorMessage) + " System Error Message: " + str(sys.exc_info())) return onErrorReturnValue # Points processing from geometry value thePoints = geometry # New Context for 'geometry'! theLats = [] theLongs = [] # Get the list of lats and longs from the geometry points for p in range(thePoints.GetPointCount()): theLats.append(thePoints.GetY(p)) theLongs.append(thePoints.GetX(p)) # Get the Min Longitude and Max Latitude (Top Left Corner) minLong = min(theLongs) maxLat = max(theLats) # Adjust the max lat and min long for negative values (Need to make sure this works for datatypes other than climate model outputs) adjusted_Min_Long = minLong adjusted_Max_Lat = maxLat if (minLong < 0): #adjusted_Min_Long = minLong + 360 adjusted_Min_Long = minLong if (maxLat < 0): #adjusted_Max_Lat = abs(maxLat) + 90 # This line caused images selected below 0 lat to be in a very wrong position (off by 97 ish on one test) #adjusted_Max_Lat = abs(maxLat) - 90 adjusted_Max_Lat = maxLat # This quick fix did not work well enough... need something better. ## Quick Fix for 'bug 3 pixels off by half a degree' #pixel_Resolution_X = 0.5 # grid[1] #if(adjusted_Min_Long < 180): # adjusted_Min_Long = adjusted_Min_Long + ( - ( pixel_Resolution_X / 2) ) #else: # adjusted_Min_Long = adjusted_Min_Long + ( ( pixel_Resolution_X / 2) ) #pixel_Resolution_Y = -0.5 # grid[5] #if(adjusted_Max_Lat > 0): # adjusted_Max_Lat = adjusted_Max_Lat + ( - ( abs(pixel_Resolution_Y) / 2) ) #else: # adjusted_Max_Lat = adjusted_Max_Lat + ( ( abs(pixel_Resolution_Y) / 2) ) # Outfile transform x,y positions set using the adjusted min long and max lat outTransform_xPos = adjusted_Min_Long outTransform_yPos = adjusted_Max_Lat # Need this later noData_Value = theFillValue bandName = 1 fullDatset_GeoTransform = geotransform outFullGeoTransform = (outTransform_xPos, fullDatset_GeoTransform[1], fullDatset_GeoTransform[2], outTransform_yPos, fullDatset_GeoTransform[4], fullDatset_GeoTransform[5]) fullDataset_Projection = wkt uniqueID = uid # Entire Job ID # Process the filename outFileName = extractTif.get_Tif_FileOutName( theDataTypeNumber, theYear, theMonth, theDay) outFileFolder = params.zipFile_ScratchWorkspace_Path + str( uid) + "/" outFileFullPath = outFileFolder + outFileName #logger.debug("Alert: 1") #logger.debug("Alert: 2") # Get the output File size out_X_Size = hdf_Data_Array.shape[1] out_Y_Size = hdf_Data_Array.shape[0] # Get the gdal driver and create the a blank output file theDriverFormat = "GTiff" theDriver = gdal.GetDriverByName(theDriverFormat) #logger.debug("Alert: 3") outDS = theDriver.Create(outFileFullPath, out_X_Size, out_Y_Size, 1, GDT_Float32) #logger.debug("Alert: 4") # Get the image band and write the data array values to it. Flush the Cache and set the NoDataValue (This is the step that writes data to the output file) outDataArray = hdf_Data_Array outBand = outDS.GetRasterBand(bandName) outBand.WriteArray(outDataArray, 0, 0) outBand.SetNoDataValue(noData_Value) outBand.FlushCache() #logger.debug("Alert: 5") # Set the projection and transform outDS.SetGeoTransform(outFullGeoTransform) outDS.SetProjection(fullDataset_Projection) # closes the dataset (Very important!) outDS = None #logger.debug("Alert: 6") # That should be it... we should now have a tif file located in the zipfile scratch area... and many, for each time this is run! # If we got this far, return '1' as a way to signal that it all worked and the current Tif file should be created. return 1 except: # Something went wrong. logger.debug( "DataCalculator: Download Job ERROR: Not sure what went wrong... System Error Message: " + str(sys.exc_info())) return onErrorReturnValue pass # It's looking like we can use this return to be a 1 or 0 (if the tif file was generated or not?) return onErrorReturnValue else: # Normal Statistical operations mathoper = pMath.mathOperations( operationsType, 1, params.dataTypes[dataType]['fillValue'], None) try: store = dStore.datastorage(dataType, year) #logger.debug("DataCalculator Alert A") indexer = params.dataTypes[dataType]['indexer'] #logger.debug("DataCalculator Alert B") fillValue = params.getFillValue(dataType) #logger.debug("DataCalculator Alert C") index = indexer.getIndexBasedOnDate(day, month, year) #logger.debug("DataCalculator Alert D") # This fix worked for the downloads... lets see if it works here too! array_H5Data = None try: array_H5Data = store.getData(index, bounds=bounds) logger.debug("BBBBBBBBBBBBB") except: firstErrorMessage = str(sys.exc_info()) logger.debug( "DataCalculator: Statistics Job ERROR getting data from H5 to array_H5Data: We are inside 2 try/except blocks. firstErrorMessage: " + str(firstErrorMessage) + ", Trying something crazy before bailing out!") # Last ditch effort, lets replace the buggy h5py functions try: # Vars we need in here.. theBounds = bounds theStore = store theIndex = index # Stitch the two arrays together breakPoint = 0 theBounds_Part1 = (theBounds[0], (breakPoint - 1), theBounds[2], theBounds[3]) theBounds_Part2 = (breakPoint, theBounds[1], theBounds[2], theBounds[3]) hdf_Data_Array_Part1 = theStore.getData( theIndex, bounds=theBounds_Part1) hdf_Data_Array_Part2 = theStore.getData( theIndex, bounds=theBounds_Part2) theHeight_Of_New_Array = hdf_Data_Array_Part1.shape[0] theWidth_Of_New_Array = hdf_Data_Array_Part1.shape[ 1] + hdf_Data_Array_Part2.shape[1] stitchedData_Array = np.zeros( shape=(theHeight_Of_New_Array, theWidth_Of_New_Array), dtype=np.float32) for currentRowIndex in range(0, theHeight_Of_New_Array): tempRow = np.zeros(shape=(theWidth_Of_New_Array), dtype=np.float32) for currValueIndex_1 in range( 0, hdf_Data_Array_Part1.shape[1]): currentValue = hdf_Data_Array_Part1[ currentRowIndex][currValueIndex_1] tempRow[currValueIndex_1] = currentValue for currValueIndex_2 in range( 0, hdf_Data_Array_Part2.shape[1]): currentValueIndex_2_Adjusted = currValueIndex_2 + hdf_Data_Array_Part1.shape[ 1] currentValue = hdf_Data_Array_Part2[ currentRowIndex][currValueIndex_2] tempRow[ currentValueIndex_2_Adjusted] = currentValue stitchedData_Array[currentRowIndex] = tempRow # here goes... array_H5Data = stitchedData_Array logger.debug( "DataCalculator stitchedData_Array has been built.") #logger.debug("DataCalculator Value of 'stitchedData_Array': " + str(stitchedData_Array)) except: logger.debug( "DataCalculator: Download Job ERROR getting data from H5 to hdf_Data_Array: We are inside 2 try/except blocks, and the second one failed..The code will break shortly... firstErrorMessage: " + str(firstErrorMessage) + " System Error Message: " + str(sys.exc_info())) #logger.debug("DataCalculator Alert E") logger.debug("DataCalculator.getDayValue : Value of 'index': " + str(index)) # ks note // understanding whats in the 'array' object #logger.debug("DataCalculator.getDayValue : Value of 'index': " + str(index)) #logger.debug("DataCalculator.getDayValue : Value of 'array': " + str(array)) #logger.debug("DataCalculator.getDayValue : Value of 'array': " + str(array)) #mask = np.where((array_H5Data != fillValue) & (clippedmask == True)) # #logger.debug("DataCalculator Alert F") # #logger.debug("DataCalculator Alert F.debug: DataCalculator.getDayValue : Value of 'clippedmask': " + str(clippedmask)) #logger.debug("DataCalculator Alert F.debug: DataCalculator.getDayValue : Value of 'mask': " + str(mask)) #logger.debug("DataCalculator Alert F.debug: DataCalculator.getDayValue : Value of 'array_H5Data': " + str(array_H5Data)) #logger.debug("DataCalculator Alert F.debug: DataCalculator.getDayValue : Value of 'str(len(mask[0]))': " + str(len(mask[0]))) #logger.debug("DataCalculator Alert F.debug: DataCalculator.getDayValue : Value of 'str(len(mask[1]))': " + str(len(mask[1]))) #logger.debug("DataCalculator Alert F.debug: DataCalculator.getDayValue : Value of 'str(array_H5Data.size)': " + str(array_H5Data.size)) # Something in here breaks on Climate Datatypes that are found in the southern hemisphere #mathoper.addData(array_H5Data[mask]) # SOMETHING WRONG HERE!! mask = None try: mask = np.where((array_H5Data != fillValue) & (clippedmask == True)) logger.debug("DataCalculator Alert F") #logger.debug("DataCalculator Alert F.debug: DataCalculator.getDayValue : Value of 'clippedmask': " + str(clippedmask)) #logger.debug("DataCalculator Alert F.debug: DataCalculator.getDayValue : Value of 'mask': " + str(mask)) #logger.debug("DataCalculator Alert F.debug: DataCalculator.getDayValue : Value of 'array_H5Data': " + str(array_H5Data)) #logger.debug("DataCalculator Alert F.debug: DataCalculator.getDayValue : Value of 'str(len(mask[0]))': " + str(len(mask[0]))) #logger.debug("DataCalculator Alert F.debug: DataCalculator.getDayValue : Value of 'str(len(mask[1]))': " + str(len(mask[1]))) #logger.debug("DataCalculator Alert F.debug: DataCalculator.getDayValue : Value of 'str(array_H5Data.size)': " + str(array_H5Data.size)) # If the Size of the mask is 0.... raise exception if len(mask[0]) == 0: logger.debug( "DataCalculator Alert F.debug.raise: DataCalculator.getDayValue : Issue With len(mask[0]). It should NOT be equal to 0. Raising the exception...': " ) raise mathoper.addData(array_H5Data[mask]) # SOMETHING WRONG HERE!! except: logger.debug( "DataCalculator Alert F.except.debug: Something went wrong with the normal process.." ) # Make a mask that matches the existing data array but whose values are the result of a clipped mask that is always sizeOfH5Data = array_H5Data.size # ex: 24 numOf_H5_Rows = array_H5Data.shape[0] # ex: 3 numOf_H5_Cols = array_H5Data.shape[1] # ex: 8 maskArray_1 = np.zeros(shape=(sizeOfH5Data), dtype=int) maskArray_2 = np.zeros(shape=(sizeOfH5Data), dtype=int) # Set the values of the arrays (looks like using range does not include the last value.) for j in range(0, numOf_H5_Rows): for i in range(0, numOf_H5_Cols): current_Index = i + ( numOf_H5_Cols * j ) # currentColumnIndex + (numOfColumns * currentRowIndex) current_Value_Part_1 = j # Just put the Row Value (this gives the repeating pattern we want current_Value_Part_2 = i # Current Column should do it.. that pattern repeats for each row. maskArray_1[current_Index] = current_Value_Part_1 maskArray_2[current_Index] = current_Value_Part_2 fakeMask = (maskArray_1, maskArray_2) #logger.debug("DataCalculator Alert F.except.debug: DataCalculator.getDayValue : Value of 'fakeMask': " + str(fakeMask)) # Lets try this again!! mathoper.addData(array_H5Data[fakeMask]) #logger.debug("DataCalculator Alert G") del mask del array_H5Data store.close() #logger.debug("DataCalculator Alert H") value = mathoper.getOutput() #logger.debug("DataCalculator Alert I") mathoper.cleanup() logger.debug("DataCalculator Alert J") return value except: e = sys.exc_info()[0] logger.debug( "DataCalculator.getDayValue : returning fill value.. 'mathoper.getFillValue()': " + str(mathoper.getFillValue()) + " System Error Message: " + str(e)) return mathoper.getFillValue()
def ingestSubProcess_Year(current_DataTypeNumber, year): itemsCounter = 0 inputYear = str(year) processedFileNames = [] skippedFileNames = [] dataStore = dataS.datastorage(current_DataTypeNumber, year, forWriting=True) indexer = params.dataTypes[current_DataTypeNumber]['indexer'] inputdir = params.dataTypes[current_DataTypeNumber]['inputDataLocation'] print("inputdir: " + inputdir) # Iterate through each file and do the processing for filename in os.listdir(inputdir): if filename.endswith(".tif"): fileToProcess = os.path.join(inputdir, filename) #print("Processing "+ str(fileToProcess)) directory, fileonly = os.path.split(fileToProcess) # Get the Year, Month and Day the file represents dictionary = get_YearMonthDay_Obj_From_ClimateChange_FileName( fileonly) # dateutils.breakApartChripsName(fileonly) # We only want items for the current year compareYear = str(dictionary['year']) #print("compareYear: " + compareYear) if compareYear == inputYear: year = dictionary['year'] month = dictionary['month'] day = dictionary['day'] # Open / Read the file #print("opening ds") ds = georead.openGeoTiff(fileToProcess) #print("GetProjection") prj = ds.GetProjection() #print("GetGeoTransform") grid = ds.GetGeoTransform() #print("readBandFromFile") # Index it. img = georead.readBandFromFile(ds, 1) ds = None #print("getIndexBasedOnDate") index = indexer.getIndexBasedOnDate(day, month, year) #print "Index:",index dataStore.putData(index, img) #print("putData") processedFileNames.append(fileonly) #print("processedFileNames") itemsCounter += 1 else: skippedFileNames.append(fileonly) # Close and save the data dataStore.close() print("data should be in ds now") if (itemsCounter > 0): print("trying to writeSpatialInformation") try: dataS.writeSpatialInformation( params.dataTypes[current_DataTypeNumber]['directory'], prj, grid, year) except Exception, e: print("Here's the error: " + str(e)) #print("Debug: processedFileNames: " + str(processedFileNames)) #print("Debug: skippedFileNames: " + str(skippedFileNames)) print("Finished processing, " + str(itemsCounter) + ", data items for year: " + str(year)) # need the projection and grid strings for the capabilities output. retObject = {"projection": prj, "grid": grid} return retObject
def processYearByDirectory(dataType, year, inputdir): ''' :param dataType: :param year: :param inputdir: ''' ###Process the incoming data dataStore = dataS.datastorage(dataType, year, forWriting=True) indexer = params.dataTypes[dataType]['indexer'] for filename in os.listdir(inputdir): if filename.endswith(".tif") and "chirps" in filename: fileToProcess = inputdir + "/" + filename print "Processing " + fileToProcess directory, fileonly = os.path.split(fileToProcess) dictionary = dateutils.breakApartChripsName(fileonly) year = dictionary['year'] month = dictionary['month'] day = dictionary['day'] sdate = "{0} {1} {2}".format(day, month, year) filedate = datetime.datetime.strptime(sdate, "%d %m %Y") ds = georead.openGeoTiff(fileToProcess) prj = ds.GetProjection() grid = ds.GetGeoTransform() time.sleep(1) img = georead.readBandFromFile(ds, 1) index = indexer.getIndexBasedOnDate(day, month, year) print "Index:", index try: changed = False with open('/data/data/cserv/www/html/json/stats.json', 'r+') as f: data = json.load(f) for item in data['items']: if (item['name'] == 'chirps'): ldatestring = item['Latest'] ldate = datetime.datetime.strptime( ldatestring, "%d %m %Y") if ldate < filedate: item['Latest'] = sdate changed = True if changed: f.seek( 0 ) # <--- should reset file position to the beginning. json.dump(data, f, indent=4) f.truncate() # remove remaining part except Exception as e: print("******************" + e + "****************************") pass time.sleep(1) dataStore.putData(index, img) time.sleep(1) ds = None dataStore.close() dataS.writeSpatialInformation(params.dataTypes[dataType]['directory'], prj, grid, year)
def ingest_CHIRPSMonthly(startYYYYMM, endYYYYMM): # Set the Datatype number current_DataTypeNumber = 28 # Hardcoded until there is a better way to get this information (maybe params DB?) # Data Classes? # Convert to dates dateFormat = "%Y%m" start_Date = datetime.datetime.strptime(startYYYYMM, dateFormat) end_Date = datetime.datetime.strptime(endYYYYMM, dateFormat) # Build expected string list dataset_Obj_List = [] end_Date = add_months( end_Date, 1) # this is to fix that hacky while loop found below tempDate = start_Date while ((end_Date - tempDate).days > 0): # Date to be used inside the while loop currentDate = tempDate # From the FTP downloader for Chirps Monthly #theCurrentPath = ftp_FolderPath + "chirps-v2.0." + str(currentDate.year) + "." + str("%02d" % currentDate.month) + ".tif.gz" #expected_FTP_FilePaths.append(theCurrentPath) #print("-Expected Path: " + str(theCurrentPath)) # Get the expected filename # something like this should be part of a dataclasses object tifFileName = "chirps-v2.0." + str(currentDate.year) + "." + str( "%02d" % currentDate.month) + ".tif" # append the object obj_To_Append = { "Tif_File_Name": tifFileName, "year": currentDate.year, "month": currentDate.month, "day": currentDate.day } dataset_Obj_List.append(obj_To_Append) # Increment and set new temp value for while loop currentDate = add_months(tempDate, 1) tempDate = currentDate # Folder where TIF files end up after download. input_Dataset_Folder = params.dataTypes[current_DataTypeNumber][ 'inputDataLocation'] # Other vars needed for the loop itemsCounter = 0 ingest_Error_List = [] capabilities_DateFormatString = "%Y_%m" last_YYYY_MM_DD_Processed = None # Ingest specific stuff yearForHDF = int(startYYYYMM[0:4]) # Year for HDF File dataStore = dataS.datastorage(current_DataTypeNumber, yearForHDF, forWriting=True) indexer = params.dataTypes[current_DataTypeNumber]['indexer'] # Do the actual ingest. for currentObj in dataset_Obj_List: try: # Try to ingest the file, record error if there is an error # open the file fileName = currentObj['Tif_File_Name'] fileToProcess = os.path.join(input_Dataset_Folder, fileName) print("-Processing File: " + str(fileToProcess)) theYear = yearForHDF #currentObj['year'] theMonth = currentObj['month'] theDay = 1 #currentObj['day'] # Monthly datasets use the first day of each month. # Open / Read the file ds = georead.openGeoTiff(fileToProcess) #ds = georead.openGeoTiff_WithUpdateFlag(fileToProcess) time.sleep(t) # If the dataset format does not come with a correct projection and transform, this is where to override them. # Set a new projection (since the IMERG data does not come with one already..) #ds.SetProjection(IMERG_DataClass.get_DefaultProjection_String()) #ds.SetGeoTransform(IMERG_DataClass.get_DefaultGeoTransform_Obj()) # Get the values to save (just like in all the other ingest procedures. prj = ds.GetProjection() grid = ds.GetGeoTransform() # Index it. img = georead.readBandFromFile(ds, 1) ds = None #index = indexer.getIndexBasedOnDate() index = indexer.getIndexBasedOnDate(theDay, theMonth, theYear) #print "Index:",index dataStore.putData(index, img) #last_YYYY_MM_DD_Processed = str(theYear)+ "_" + str("%02d" % theMonth) + "_" + str("%02d" % theDay) last_YYYY_MM_Processed = str(theYear) + "_" + str( "%02d" % theMonth) # + "_" + str("%02d" % theDay) itemsCounter += 1 except: # do something in the event of an error e = sys.exc_info()[0] errorStr = "-ERROR Ingesting File: " + str( fileName) + " System Error Message: " + str(e) print(str(errorStr)) ingest_Error_List.append(errorStr) # Close and save the data dataStore.close() if (itemsCounter > 0): dataS.writeSpatialInformation( params.dataTypes[current_DataTypeNumber]['directory'], prj, grid, yearForHDF) #print("Debug: processedFileNames: " + str(processedFileNames)) #print("Debug: skippedFileNames: " + str(skippedFileNames)) print("Finished processing, " + str(itemsCounter) + ", data items for year: " + str(yearForHDF)) # need the projection and grid strings for the capabilities output. #retObject = {"projection":prj,"grid":grid} #return retObject # Update the capabilities try: print( "-TODO, Check existing capabilities and overwrite only some parts rather than just overwriting with the last option... this was a shortcut taken to meet an expectation, budget about a day or so to fix this... right now, the last item ingested has it's date set as the 'END Date' for the capabilities range, (so if we are doing a simple reingest for a small subset in the middle of the data somewhere, this bug will show up..)" ) capabilities_Info = { "name": params.dataTypes[current_DataTypeNumber]['name'], "description": params.dataTypes[current_DataTypeNumber]['description'], "size": params.dataTypes[current_DataTypeNumber]['size'], "fillValue": params.dataTypes[current_DataTypeNumber]['fillValue'], "data_category": params.dataTypes[current_DataTypeNumber]['data_category'], "projection": prj, "grid": grid, # Get the start and end Date range. "startDateTime": "1985_01", "endDateTime": last_YYYY_MM_Processed, "date_FormatString_For_ForecastRange": capabilities_DateFormatString # Other items to save? } # Write the capabilities info to the bddb theJSONString = json.dumps(capabilities_Info) # Create a connection to the DB, set the new values, close the connection conn = bdp.BDDbConnector_Capabilities() conn.set_DataType_Capabilities_JSON(current_DataTypeNumber, theJSONString) conn.close() print("-API Datatype Capabilities for datatype number: " + str(current_DataTypeNumber) + " written to local DB as: " + str(theJSONString)) except: print( "-WARNING: Data was ingested on this run AND there was an issue updating the API Capabilities local DB" ) else: print("No Items found for year: " + str(yearForHDF)) print( str(len(ingest_Error_List)) + " errors associated with ingest items.") print("") print("Output of per-item Error Log: " + str(ingest_Error_List)) print("")
def processYearByDirectory(dataType,year, inputdir, nlastdate): ''' :param dataType: :param year: :param inputdir: ''' ###Process the incoming data print inputdir dataupdated = False dataStore = dataS.datastorage(dataType, year, forWriting=True) indexer = params.dataTypes[dataType]['indexer'] for filename in os.listdir(inputdir): filesplit = filename.split('.') fyear = filesplit[1] fmonth = filesplit[2][:2] fday = filesplit[2][2:] fdatestring = fday + " " + fmonth + " " + fyear fdate = datetime.datetime.strptime(fdatestring, "%d %m %Y") if fdate > nlastdate: if filename.endswith(".tif") and os.stat(inputdir+"/"+filename).st_size > 0: dataupdated = True fileToProcess = inputdir+"/"+filename print "Processing "+fileToProcess directory, fileonly = os.path.split(fileToProcess) dictionary = dateutils.breakApartGEFSNewName(fileonly) #name convention changed, update needed year = dictionary['year'] month = dictionary['month'] day = dictionary['day'] sdate = "{0} {1} {2}".format(day, month, year) filedate = datetime.datetime.strptime(sdate, "%d %m %Y") ds = georead.openGeoTiff(fileToProcess) prj=ds.GetProjection() grid = ds.GetGeoTransform() # day = decad * 10 # if month == int(2) and day == int(30): # day = 28 img = georead.readBandFromFile(ds, 1) ds = None index = indexer.getIndexBasedOnDate(day,month,year) print "Index:",index try: changed = False with open('/data/data/cserv/www/html/json/stats.json', 'r+') as f: data = json.load(f) for item in data['items']: if(item['name'] == 'gefsprecip'): ldatestring = item['Latest'] ldate = datetime.datetime.strptime(ldatestring, "%d %m %Y") if ldate < filedate: print("file date is later") item['Latest'] = sdate changed = True if changed: f.seek(0) # <--- should reset file position to the beginning. json.dump(data, f, indent=4) f.truncate() # remove remaining part except Exception as e: print(e) pass dataStore.putData(index, img) dataStore.close() if dataupdated: dataS.writeSpatialInformation(params.dataTypes[dataType]['directory'],prj,grid,year)
def processDataStarting(yyyy, mm, dd): dataType = 0 indexer = params.dataTypes[dataType]['indexer'] inputdir = params.dataTypes[0][ 'inputDataLocation'] + yyyy # will need to update this if year changes dataStore = dataS.datastorage( dataType, int(yyyy), forWriting=True) # will need to update this if year changes ldatestring = dd + " " + mm + " " + yyyy ldate = datetime.datetime.strptime(ldatestring, "%d %m %Y") date = ldate + datetime.timedelta(days=1) while date.date() < datetime.datetime.now().date(): fileToProcess = inputdir + "/chirp." + date.strftime( '%Y.%m.%d') + ".tif" if os.path.exists( fileToProcess) and os.path.getsize(fileToProcess) > 0: print "file exists, ingest started on: " + fileToProcess directory, fileonly = os.path.split(fileToProcess) dictionary = dateutils.breakApartChripsName(fileonly) year = dictionary['year'] month = dictionary['month'] day = dictionary['day'] sdate = "{0} {1} {2}".format(day, month, year) filedate = datetime.datetime.strptime(sdate, "%d %m %Y") ds = georead.openGeoTiff(fileToProcess) prj = ds.GetProjection() grid = ds.GetGeoTransform() img = georead.readBandFromFile(ds, 1) ds = None index = indexer.getIndexBasedOnDate(date.day, date.month, date.year) print "Index:", index c = np.array(dataStore.getData(index)) if (c == -9999).all() == True: dataStore.putData(index, img) print date.strftime('%Y.%m.%d') + " data added to hdf" try: changed = False with open('/data/data/cserv/www/html/json/stats.json', 'r+') as f: data = json.load(f) for item in data['items']: if (item['name'] == 'chirp'): ldatestring = item['Latest'] ldate = date #.strftime("%d %m %Y") #datetime.datetime.strptime(ldatestring, "%d %m %Y") if ldate < filedate: item['Latest'] = sdate changed = True if changed: f.seek( 0 ) # <--- should reset file position to the beginning. json.dump(data, f, indent=4) f.truncate() # remove remaining part except Exception as e: print(e) pass else: print date.strftime('%Y.%m.%d') + " data already in hdf" else: print "nothing to ingest " date = date + datetime.timedelta(days=1) dataStore.close()