def processYearByDirectory(dataType,year, inputdir, ldate): ''' :param dataType: :param year: :param inputdir: ''' ###Process the incoming data dataupdated = False dataStore = dataS.datastorage(dataType, year, forWriting=True) indexer = params.dataTypes[dataType]['indexer'] for filename in os.listdir(inputdir): filesplit = filename.split('.') fyear = filesplit[1] fmonth = filesplit[2][:2] fday = filesplit[2][2:] fdatestring = fday + " " + fmonth + " " + fyear fdate = datetime.datetime.strptime(fdatestring, "%d %m %Y") if fdate > ldate: if filename.endswith(".tif") and os.stat(inputdir+"/"+filename).st_size > 0: dataupdated = True fileToProcess = inputdir+"/"+filename print "Processing "+fileToProcess directory, fileonly = os.path.split(fileToProcess) dictionary = dateutils.breakApartGEFSNewName(fileonly) #name convention changed, update needed year = dictionary['year'] month = dictionary['month'] day = dictionary['day'] sdate = "{0} {1} {2}".format(day, month, year) filedate = datetime.datetime.strptime(sdate, "%d %m %Y") ds = georead.openGeoTiff(fileToProcess) prj=ds.GetProjection() grid = ds.GetGeoTransform() # day = decad * 10 # if month == int(2) and day == int(30): # day = 28 img = georead.readBandFromFile(ds, 1) ds = None index = indexer.getIndexBasedOnDate(day,month,year) print "Index:",index c = np.array(dataStore.getData(index)) if(c==-9999).all() == True: dataStore.putData(index, img) else: print fdate.strftime('%Y.%m.%d') + " data already in hdf" else: print "file date b4 late date" dataStore.close() if dataupdated: dataS.writeSpatialInformation(params.dataTypes[dataType]['directory'],prj,grid,year)
def processYearByDirectory(dataType, year, inputdir): ''' :param dataType: :param year: :param inputdir: ''' ###Process the incoming data dataStore = dataS.datastorage(dataType, year, forWriting=True) indexer = params.dataTypes[dataType]['indexer'] for filename in os.listdir(inputdir): if filename.endswith(".tif"): fileToProcess = inputdir + "/" + filename print "Processing " + fileToProcess directory, fileonly = os.path.split(fileToProcess) dictionary = dateutils.breakApartSmapName(fileonly) year = int(dictionary['year']) day = int(dictionary['day']) month = int(dictionary['month']) ds = georead.openGeoTiff(fileToProcess) prj = ds.GetProjection() grid = ds.GetGeoTransform() img = georead.readBandFromFile(ds, 1) ds = None index = indexer.getIndexBasedOnDate(day, month, year) print "Index:", index dataStore.putData(index, img) dataStore.close() dataS.writeSpatialInformation(params.dataTypes[dataType]['directory'], prj, grid, year)
def processYearByDirectory(dataType,year, inputdir): ''' :param dataType: :param year: :param inputdir: ''' ###Process the incoming data prj= None #dataStore = dataS.datastorage(dataType, year, forWriting=True) indexer = params.dataTypes[dataType]['indexer'] for filename in os.listdir(inputdir): if filename.endswith(".tif"): try: dataStore = dataS.datastorage(dataType, year, forWriting=True) fileToProcess = inputdir+"/"+filename print "Processing "+fileToProcess directory, fileonly = os.path.split(fileToProcess) dictionary = dateutils.breakApartemodisNameAdjust(fileonly, 3) year = dictionary['year'] month = dictionary['month'] day = dictionary['day'] sdate = "{0} {1} {2}".format(day, month, year) filedate = datetime.datetime.strptime(sdate, "%d %m %Y") ds = georead.openGeoTiff(fileToProcess) prj=ds.GetProjection() grid = ds.GetGeoTransform() img = georead.readBandFromFile(ds, 1) try: xSize = params.dataTypes[dataType]['size'][0] img = np.delete(img, (xSize), axis=1) print("Accounting for pixel width differences.") except: pass try: ySize = params.dataTypes[dataType]['size'][1] img = np.delete(img, (ySize), axis=0) print("Accounting for pixel height differences.") except: pass ###Manipulate the data as based on FEWS.NET data document to get NDVI from data. #eMODIS NDVI data are stretched (mapped) linearly (to byte values) as follows: #[-1.0, 1.0] -> [0, 200] - Invalid Values: 201 - 255 #NDVI = (value - 100) / 100; example: [ (150 - 100) / 100 = 0.5 NDVI ] #print np.max(img) validmask = np.where(img<=200) invalidmask = np.where((img>200) | (img<100)) #print "Max during:",np.max(img[validmask]) img = img.astype(np.float32) img[validmask] = (img[validmask] - 100)/100. img[invalidmask] = img[invalidmask]*0+params.dataTypes[dataType]['fillValue'] #print np.max(img) ds = None index = indexer.getIndexBasedOnDate(day,month,year) print month,"/",day,"/",year,"--Index->",index #print "Index:",index #print "Index:",index try: changed = False with open('/data/data/cserv/www/html/json/stats.json', 'r+') as f: data = json.load(f) for item in data['items']: if(item['name'] == 'casndvi'): ldatestring = item['Latest'] ldate = datetime.datetime.strptime(ldatestring, "%d %m %Y") if ldate < filedate: print("file date is later") item['Latest'] = sdate changed = True if changed: f.seek(0) # <--- should reset file position to the beginning. json.dump(data, f, indent=4) f.truncate() # remove remaining part except Exception as e: print(e) pass dataStore.putData(index, img) img = None dataStore.close() except: print 'Failed adding tif' img = None dataStore.close() #dataStore.close() if prj is not None: dataS.writeSpatialInformation(params.dataTypes[dataType]['directory'],prj,grid,year)
def ingest_IMERG(startYYYYMMDD, endYYYYMMDD): # Set the Datatype number current_DataTypeNumber = 34 # Hardcoded until there are more IMERG types in here.. # Instance of Imerg Data Classes IMERG_DataClass = IDC.IMERG_Data() # Convert to dates dateFormat = "%Y%m%d" start_Date = datetime.datetime.strptime(startYYYYMMDD, dateFormat) end_Date = datetime.datetime.strptime(endYYYYMMDD, dateFormat) # Build expected string list dataset_Obj_List = [] #expected_Tif_FileNames = [] # # iterate through all dates delta = end_Date - start_Date for i in range(delta.days + 1): #print start_Date + datetime.timedelta(days=i) currentDate = start_Date + datetime.timedelta(days=i) tifFileName = IMERG_DataClass.get_Expected_Tif_FileName(currentDate.year, currentDate.month, currentDate.day) #expected_Tif_FileNames.append(tifFileName) obj_To_Append = { "Tif_File_Name":tifFileName, "year":currentDate.year, "month":currentDate.month, "day":currentDate.day } dataset_Obj_List.append(obj_To_Append) # Get the expected file names. # Folder where TIF and TFW files end up. input_Dataset_Folder = params.dataTypes[current_DataTypeNumber]['inputDataLocation'] # Other vars needed for the loop itemsCounter = 0 ingest_Error_List = [] capabilities_DateFormatString = "%Y_%m_%d" last_YYYY_MM_DD_Processed = None # Ingest specific stuff yearForHDF = int(startYYYYMMDD[0:4]) # Year for HDF File dataStore = dataS.datastorage(current_DataTypeNumber, yearForHDF, forWriting=True) indexer = params.dataTypes[current_DataTypeNumber]['indexer'] # Do the actual ingest. #for fileName in expected_Tif_FileNames: for currentObj in dataset_Obj_List: try: # Try to ingest the file, record error if there is an error # open the file fileName = currentObj['Tif_File_Name'] fileToProcess = os.path.join(input_Dataset_Folder,fileName) print(fileToProcess) if os.path.isfile(fileToProcess): print("") else: fileToProcess=fileToProcess.replace("03E","04A") if os.path.isfile(fileToProcess): print("") else: fileToProcess=fileToProcess.replace("04A","04B") print("-Processing File: " + str(fileToProcess)) # For some reason, we need to open TFW files instead of TIFs with GDAL.. fileToProcess_TFW = IMERG_DataClass.convert_TIF_FileName_To_TFW_Filename(fileToProcess) theYear = yearForHDF #currentObj['year'] theMonth = currentObj['month'] theDay = currentObj['day'] print("before geotiff") # Open / Read the file #ds = georead.openGeoTiff(fileToProcess_TFW) ds = georead.openGeoTiff_WithUpdateFlag(fileToProcess) print("after geotiff") # Set a new projection (since the IMERG data does not come with one already..) ds.SetProjection(IMERG_DataClass.get_DefaultProjection_String()) ds.SetGeoTransform(IMERG_DataClass.get_DefaultGeoTransform_Obj()) # Get the values to save (just like in all the other ingest procedures. prj = ds.GetProjection() grid = ds.GetGeoTransform() # Index it. img = georead.readBandFromFile(ds, 1) print img ds = None index = indexer.getIndexBasedOnDate(theDay, theMonth, theYear) #print "Index:",index dataStore.putData(index, img) last_YYYY_MM_DD_Processed = str(theYear)+ "_" + str("%02d" % theMonth) + "_" + str("%02d" % theDay) itemsCounter += 1 except: # do something in the event of an error e = sys.exc_info()[0] errorStr = "-ERROR Ingesting File: " + str(fileName) + " System Error Message: " + str(e) print(str(errorStr)) ingest_Error_List.append(errorStr) # Close and save the data dataStore.close() if(itemsCounter > 0): dataS.writeSpatialInformation(params.dataTypes[current_DataTypeNumber]['directory'],prj,grid,yearForHDF) #print("Debug: processedFileNames: " + str(processedFileNames)) #print("Debug: skippedFileNames: " + str(skippedFileNames)) print("Finished processing, " + str(itemsCounter) + ", data items for year: " + str(yearForHDF)) # need the projection and grid strings for the capabilities output. #retObject = {"projection":prj,"grid":grid} #return retObject # Update the capabilities try: print("-TODO, Check existing capabilities and overwrite only some parts rather than just overwriting with the last option... this was a shortcut taken to meet an expectation, budget about a day or so to fix this... right now, the last item ingested has it's date set as the 'END Date' for the capabilities range, (so if we are doing a simple reingest for a small subset in the middle of the data somewhere, this bug will show up..)") capabilities_Info = { "name":params.dataTypes[current_DataTypeNumber]['name'], "description":params.dataTypes[current_DataTypeNumber]['description'], "size":params.dataTypes[current_DataTypeNumber]['size'], "fillValue":params.dataTypes[current_DataTypeNumber]['fillValue'], "data_category":params.dataTypes[current_DataTypeNumber]['data_category'], "projection":prj, "grid":grid, # Get the start and end Date range. "startDateTime":"2015_03_08", "endDateTime":last_YYYY_MM_DD_Processed, "date_FormatString_For_ForecastRange":capabilities_DateFormatString # Other items to save? } # Write the capabilities info to the bddb theJSONString = json.dumps(capabilities_Info) # Create a connection to the DB, set the new values, close the connection conn = bdp.BDDbConnector_Capabilities() conn.set_DataType_Capabilities_JSON(current_DataTypeNumber, theJSONString) conn.close() print("-API Datatype Capabilities for datatype number: " +str(current_DataTypeNumber) + " written to local DB as: " + str(theJSONString)) except: print("-WARNING: Data was ingested on this run AND there was an issue updating the API Capabilities local DB") else: print("No Items found for year: " + str(yearForHDF)) print(str(len(ingest_Error_List)) + " errors associated with ingest items.") print("") print("Output of per-item Error Log: " + str(ingest_Error_List)) print("")
def ingestSubProcess_Year(current_DataTypeNumber, year): itemsCounter = 0 inputYear = str(year) processedFileNames = [] skippedFileNames = [] dataStore = dataS.datastorage(current_DataTypeNumber, year, forWriting=True) indexer = params.dataTypes[current_DataTypeNumber]['indexer'] inputdir = params.dataTypes[current_DataTypeNumber]['inputDataLocation'] print("inputdir: " + inputdir) # Iterate through each file and do the processing for filename in os.listdir(inputdir): if filename.endswith(".tif"): fileToProcess = os.path.join(inputdir, filename) #print("Processing "+ str(fileToProcess)) directory, fileonly = os.path.split(fileToProcess) # Get the Year, Month and Day the file represents dictionary = get_YearMonthDay_Obj_From_ClimateChange_FileName( fileonly) # dateutils.breakApartChripsName(fileonly) # We only want items for the current year compareYear = str(dictionary['year']) #print("compareYear: " + compareYear) if compareYear == inputYear: year = dictionary['year'] month = dictionary['month'] day = dictionary['day'] # Open / Read the file #print("opening ds") ds = georead.openGeoTiff(fileToProcess) #print("GetProjection") prj = ds.GetProjection() #print("GetGeoTransform") grid = ds.GetGeoTransform() #print("readBandFromFile") # Index it. img = georead.readBandFromFile(ds, 1) ds = None #print("getIndexBasedOnDate") index = indexer.getIndexBasedOnDate(day, month, year) #print "Index:",index dataStore.putData(index, img) #print("putData") processedFileNames.append(fileonly) #print("processedFileNames") itemsCounter += 1 else: skippedFileNames.append(fileonly) # Close and save the data dataStore.close() print("data should be in ds now") if (itemsCounter > 0): print("trying to writeSpatialInformation") try: dataS.writeSpatialInformation( params.dataTypes[current_DataTypeNumber]['directory'], prj, grid, year) except Exception, e: print("Here's the error: " + str(e)) #print("Debug: processedFileNames: " + str(processedFileNames)) #print("Debug: skippedFileNames: " + str(skippedFileNames)) print("Finished processing, " + str(itemsCounter) + ", data items for year: " + str(year)) # need the projection and grid strings for the capabilities output. retObject = {"projection": prj, "grid": grid} return retObject
def processYearByDirectory(dataType, year, inputdir): ''' :param dataType: :param year: :param inputdir: ''' ###Process the incoming data dataStore = dataS.datastorage(dataType, year, forWriting=True) indexer = params.dataTypes[dataType]['indexer'] for filename in os.listdir(inputdir): if filename.endswith(".tif") and "chirps" in filename: fileToProcess = inputdir + "/" + filename print "Processing " + fileToProcess directory, fileonly = os.path.split(fileToProcess) dictionary = dateutils.breakApartChripsName(fileonly) year = dictionary['year'] month = dictionary['month'] day = dictionary['day'] sdate = "{0} {1} {2}".format(day, month, year) filedate = datetime.datetime.strptime(sdate, "%d %m %Y") ds = georead.openGeoTiff(fileToProcess) prj = ds.GetProjection() grid = ds.GetGeoTransform() time.sleep(1) img = georead.readBandFromFile(ds, 1) index = indexer.getIndexBasedOnDate(day, month, year) print "Index:", index try: changed = False with open('/data/data/cserv/www/html/json/stats.json', 'r+') as f: data = json.load(f) for item in data['items']: if (item['name'] == 'chirps'): ldatestring = item['Latest'] ldate = datetime.datetime.strptime( ldatestring, "%d %m %Y") if ldate < filedate: item['Latest'] = sdate changed = True if changed: f.seek( 0 ) # <--- should reset file position to the beginning. json.dump(data, f, indent=4) f.truncate() # remove remaining part except Exception as e: print("******************" + e + "****************************") pass time.sleep(1) dataStore.putData(index, img) time.sleep(1) ds = None dataStore.close() dataS.writeSpatialInformation(params.dataTypes[dataType]['directory'], prj, grid, year)
def ingest_CHIRPSMonthly(startYYYYMM, endYYYYMM): # Set the Datatype number current_DataTypeNumber = 28 # Hardcoded until there is a better way to get this information (maybe params DB?) # Data Classes? # Convert to dates dateFormat = "%Y%m" start_Date = datetime.datetime.strptime(startYYYYMM, dateFormat) end_Date = datetime.datetime.strptime(endYYYYMM, dateFormat) # Build expected string list dataset_Obj_List = [] end_Date = add_months( end_Date, 1) # this is to fix that hacky while loop found below tempDate = start_Date while ((end_Date - tempDate).days > 0): # Date to be used inside the while loop currentDate = tempDate # From the FTP downloader for Chirps Monthly #theCurrentPath = ftp_FolderPath + "chirps-v2.0." + str(currentDate.year) + "." + str("%02d" % currentDate.month) + ".tif.gz" #expected_FTP_FilePaths.append(theCurrentPath) #print("-Expected Path: " + str(theCurrentPath)) # Get the expected filename # something like this should be part of a dataclasses object tifFileName = "chirps-v2.0." + str(currentDate.year) + "." + str( "%02d" % currentDate.month) + ".tif" # append the object obj_To_Append = { "Tif_File_Name": tifFileName, "year": currentDate.year, "month": currentDate.month, "day": currentDate.day } dataset_Obj_List.append(obj_To_Append) # Increment and set new temp value for while loop currentDate = add_months(tempDate, 1) tempDate = currentDate # Folder where TIF files end up after download. input_Dataset_Folder = params.dataTypes[current_DataTypeNumber][ 'inputDataLocation'] # Other vars needed for the loop itemsCounter = 0 ingest_Error_List = [] capabilities_DateFormatString = "%Y_%m" last_YYYY_MM_DD_Processed = None # Ingest specific stuff yearForHDF = int(startYYYYMM[0:4]) # Year for HDF File dataStore = dataS.datastorage(current_DataTypeNumber, yearForHDF, forWriting=True) indexer = params.dataTypes[current_DataTypeNumber]['indexer'] # Do the actual ingest. for currentObj in dataset_Obj_List: try: # Try to ingest the file, record error if there is an error # open the file fileName = currentObj['Tif_File_Name'] fileToProcess = os.path.join(input_Dataset_Folder, fileName) print("-Processing File: " + str(fileToProcess)) theYear = yearForHDF #currentObj['year'] theMonth = currentObj['month'] theDay = 1 #currentObj['day'] # Monthly datasets use the first day of each month. # Open / Read the file ds = georead.openGeoTiff(fileToProcess) #ds = georead.openGeoTiff_WithUpdateFlag(fileToProcess) time.sleep(t) # If the dataset format does not come with a correct projection and transform, this is where to override them. # Set a new projection (since the IMERG data does not come with one already..) #ds.SetProjection(IMERG_DataClass.get_DefaultProjection_String()) #ds.SetGeoTransform(IMERG_DataClass.get_DefaultGeoTransform_Obj()) # Get the values to save (just like in all the other ingest procedures. prj = ds.GetProjection() grid = ds.GetGeoTransform() # Index it. img = georead.readBandFromFile(ds, 1) ds = None #index = indexer.getIndexBasedOnDate() index = indexer.getIndexBasedOnDate(theDay, theMonth, theYear) #print "Index:",index dataStore.putData(index, img) #last_YYYY_MM_DD_Processed = str(theYear)+ "_" + str("%02d" % theMonth) + "_" + str("%02d" % theDay) last_YYYY_MM_Processed = str(theYear) + "_" + str( "%02d" % theMonth) # + "_" + str("%02d" % theDay) itemsCounter += 1 except: # do something in the event of an error e = sys.exc_info()[0] errorStr = "-ERROR Ingesting File: " + str( fileName) + " System Error Message: " + str(e) print(str(errorStr)) ingest_Error_List.append(errorStr) # Close and save the data dataStore.close() if (itemsCounter > 0): dataS.writeSpatialInformation( params.dataTypes[current_DataTypeNumber]['directory'], prj, grid, yearForHDF) #print("Debug: processedFileNames: " + str(processedFileNames)) #print("Debug: skippedFileNames: " + str(skippedFileNames)) print("Finished processing, " + str(itemsCounter) + ", data items for year: " + str(yearForHDF)) # need the projection and grid strings for the capabilities output. #retObject = {"projection":prj,"grid":grid} #return retObject # Update the capabilities try: print( "-TODO, Check existing capabilities and overwrite only some parts rather than just overwriting with the last option... this was a shortcut taken to meet an expectation, budget about a day or so to fix this... right now, the last item ingested has it's date set as the 'END Date' for the capabilities range, (so if we are doing a simple reingest for a small subset in the middle of the data somewhere, this bug will show up..)" ) capabilities_Info = { "name": params.dataTypes[current_DataTypeNumber]['name'], "description": params.dataTypes[current_DataTypeNumber]['description'], "size": params.dataTypes[current_DataTypeNumber]['size'], "fillValue": params.dataTypes[current_DataTypeNumber]['fillValue'], "data_category": params.dataTypes[current_DataTypeNumber]['data_category'], "projection": prj, "grid": grid, # Get the start and end Date range. "startDateTime": "1985_01", "endDateTime": last_YYYY_MM_Processed, "date_FormatString_For_ForecastRange": capabilities_DateFormatString # Other items to save? } # Write the capabilities info to the bddb theJSONString = json.dumps(capabilities_Info) # Create a connection to the DB, set the new values, close the connection conn = bdp.BDDbConnector_Capabilities() conn.set_DataType_Capabilities_JSON(current_DataTypeNumber, theJSONString) conn.close() print("-API Datatype Capabilities for datatype number: " + str(current_DataTypeNumber) + " written to local DB as: " + str(theJSONString)) except: print( "-WARNING: Data was ingested on this run AND there was an issue updating the API Capabilities local DB" ) else: print("No Items found for year: " + str(yearForHDF)) print( str(len(ingest_Error_List)) + " errors associated with ingest items.") print("") print("Output of per-item Error Log: " + str(ingest_Error_List)) print("")
def processYearByDirectory(dataType,year, inputdir, nlastdate): ''' :param dataType: :param year: :param inputdir: ''' ###Process the incoming data print inputdir dataupdated = False dataStore = dataS.datastorage(dataType, year, forWriting=True) indexer = params.dataTypes[dataType]['indexer'] for filename in os.listdir(inputdir): filesplit = filename.split('.') fyear = filesplit[1] fmonth = filesplit[2][:2] fday = filesplit[2][2:] fdatestring = fday + " " + fmonth + " " + fyear fdate = datetime.datetime.strptime(fdatestring, "%d %m %Y") if fdate > nlastdate: if filename.endswith(".tif") and os.stat(inputdir+"/"+filename).st_size > 0: dataupdated = True fileToProcess = inputdir+"/"+filename print "Processing "+fileToProcess directory, fileonly = os.path.split(fileToProcess) dictionary = dateutils.breakApartGEFSNewName(fileonly) #name convention changed, update needed year = dictionary['year'] month = dictionary['month'] day = dictionary['day'] sdate = "{0} {1} {2}".format(day, month, year) filedate = datetime.datetime.strptime(sdate, "%d %m %Y") ds = georead.openGeoTiff(fileToProcess) prj=ds.GetProjection() grid = ds.GetGeoTransform() # day = decad * 10 # if month == int(2) and day == int(30): # day = 28 img = georead.readBandFromFile(ds, 1) ds = None index = indexer.getIndexBasedOnDate(day,month,year) print "Index:",index try: changed = False with open('/data/data/cserv/www/html/json/stats.json', 'r+') as f: data = json.load(f) for item in data['items']: if(item['name'] == 'gefsprecip'): ldatestring = item['Latest'] ldate = datetime.datetime.strptime(ldatestring, "%d %m %Y") if ldate < filedate: print("file date is later") item['Latest'] = sdate changed = True if changed: f.seek(0) # <--- should reset file position to the beginning. json.dump(data, f, indent=4) f.truncate() # remove remaining part except Exception as e: print(e) pass dataStore.putData(index, img) dataStore.close() if dataupdated: dataS.writeSpatialInformation(params.dataTypes[dataType]['directory'],prj,grid,year)