Exemple #1
0
def processYearByDirectory(dataType,year, inputdir, ldate):
    '''
    
    :param dataType:
    :param year:
    :param inputdir:
    '''
    ###Process the incoming data
    dataupdated = False
    dataStore = dataS.datastorage(dataType, year, forWriting=True)
    indexer = params.dataTypes[dataType]['indexer']
    for filename in os.listdir(inputdir):
        filesplit = filename.split('.') 
        fyear = filesplit[1]
        fmonth = filesplit[2][:2]
        fday = filesplit[2][2:]
        fdatestring = fday + " " + fmonth + " " + fyear
        fdate = datetime.datetime.strptime(fdatestring, "%d %m %Y")
        if fdate > ldate:
			if filename.endswith(".tif") and os.stat(inputdir+"/"+filename).st_size > 0:
				dataupdated = True
				fileToProcess = inputdir+"/"+filename
				print "Processing "+fileToProcess
				directory, fileonly = os.path.split(fileToProcess)
			
				dictionary = dateutils.breakApartGEFSNewName(fileonly) #name convention changed, update needed
				year = dictionary['year']
				month = dictionary['month']
				day = dictionary['day']
				sdate = "{0} {1} {2}".format(day, month, year)
				filedate = datetime.datetime.strptime(sdate, "%d %m %Y")
				ds = georead.openGeoTiff(fileToProcess)
				prj=ds.GetProjection()
				grid = ds.GetGeoTransform()
				# day = decad * 10
				# if month == int(2)  and day == int(30):
				#     day = 28
				img =  georead.readBandFromFile(ds, 1)
				ds = None
				index = indexer.getIndexBasedOnDate(day,month,year)
				print "Index:",index
				c = np.array(dataStore.getData(index))
				if(c==-9999).all() == True:
					dataStore.putData(index, img)
				else:
					print fdate.strftime('%Y.%m.%d') + " data already in hdf"
        else:
			print "file date b4 late date"
    dataStore.close()
    if dataupdated:
		dataS.writeSpatialInformation(params.dataTypes[dataType]['directory'],prj,grid,year)
def processYearByDirectory(dataType, year, inputdir):
    '''
    
    :param dataType:
    :param year:
    :param inputdir:
    '''
    ###Process the incoming data

    dataStore = dataS.datastorage(dataType, year, forWriting=True)
    indexer = params.dataTypes[dataType]['indexer']
    for filename in os.listdir(inputdir):
        if filename.endswith(".tif"):

            fileToProcess = inputdir + "/" + filename
            print "Processing " + fileToProcess
            directory, fileonly = os.path.split(fileToProcess)

            dictionary = dateutils.breakApartSmapName(fileonly)
            year = int(dictionary['year'])
            day = int(dictionary['day'])
            month = int(dictionary['month'])

            ds = georead.openGeoTiff(fileToProcess)
            prj = ds.GetProjection()
            grid = ds.GetGeoTransform()

            img = georead.readBandFromFile(ds, 1)
            ds = None
            index = indexer.getIndexBasedOnDate(day, month, year)
            print "Index:", index
            dataStore.putData(index, img)

    dataStore.close()
    dataS.writeSpatialInformation(params.dataTypes[dataType]['directory'], prj,
                                  grid, year)
Exemple #3
0
def processYearByDirectory(dataType, year, inputdir):
    '''
    
    :param dataType:
    :param year:
    :param inputdir:
    '''
    ###Process the incoming data
    global dataName
    if dataType == 29:
        dataName = 'esi4week'
    else:
        dataName = 'esi12week'
    filePattern = None
    with open('/data/data/cserv/www/html/json/stats.json', 'r+') as f:
        data = json.load(f)
        theDate = filter(getESIDate, data['items'])[0]['Latest']
        filePattern = theDate.split(' ')[2] + str("%03d" % (
            (datetime.datetime.strptime(theDate, '%d %M %Y') -
             datetime.datetime(2019, 1, 1)).days + 1, ))

    dataStore = dataS.datastorage(dataType, year, forWriting=True)
    indexer = params.dataTypes[dataType]['indexer']
    for filename in os.listdir(inputdir):
        if filename.endswith(".tif") and int(
                getDatePattern(filename)) > int(filePattern):

            fileToProcess = inputdir + "/" + filename
            print "Processing " + fileToProcess
            directory, fileonly = os.path.split(fileToProcess)

            dictionary = dateutils.breakApartEsiName(fileonly)
            year = dictionary['year']
            month = dictionary['month']
            day = dictionary['day']
            sdate = "{0} {1} {2}".format(day, month, year)
            filedate = datetime.datetime.strptime(sdate, "%d %m %Y")
            ds = georead.openGeoTiff(fileToProcess)
            prj = ds.GetProjection()
            grid = ds.GetGeoTransform()

            img = georead.readBandFromFile(ds, 1)
            ds = None
            index = indexer.getIndexBasedOnDate(day, month, year)
            print "Index:", index
            try:
                changed = False
                with open('/data/data/cserv/www/html/json/stats.json',
                          'r+') as f:
                    data = json.load(f)
                    for item in data['items']:
                        print(item['name'])
                        if (item['name'] == dataName):
                            ldatestring = item['Latest']
                            ldate = datetime.datetime.strptime(
                                ldatestring, "%d %m %Y")
                            print("in here")
                            print("ldate: " + str(ldate))
                            if ldate < filedate:
                                print("file date is later")
                                item['Latest'] = sdate
                                changed = True
                    if changed:
                        f.seek(
                            0
                        )  # <--- should reset file position to the beginning.
                        json.dump(data, f, indent=4)
                        f.truncate()  # remove remaining part
            except Exception as e:
                print(e)
                pass
            dataStore.putData(index, img)

    dataStore.close()
Exemple #4
0
def processYearByDirectory(dataType,year, inputdir):
    '''
    
    :param dataType:
    :param year:
    :param inputdir:
    '''
    ###Process the incoming data
    prj= None
    #dataStore = dataS.datastorage(dataType, year, forWriting=True)
    indexer = params.dataTypes[dataType]['indexer']
    for filename in os.listdir(inputdir):
        if filename.endswith(".tif"):
            try:
               dataStore = dataS.datastorage(dataType, year, forWriting=True)
               fileToProcess = inputdir+"/"+filename
               print "Processing "+fileToProcess
               directory, fileonly = os.path.split(fileToProcess)
        
               dictionary = dateutils.breakApartemodisNameAdjust(fileonly, 3)

               year = dictionary['year']
               month = dictionary['month']

               day = dictionary['day']
               sdate = "{0} {1} {2}".format(day, month, year)
               filedate = datetime.datetime.strptime(sdate, "%d %m %Y")
               ds = georead.openGeoTiff(fileToProcess)
               prj=ds.GetProjection()
               grid = ds.GetGeoTransform()

               img =  georead.readBandFromFile(ds, 1)
               try:
               	xSize = params.dataTypes[dataType]['size'][0]
               	img = np.delete(img, (xSize), axis=1)
               	print("Accounting for pixel width differences.")
               except:
               	pass
               try:
               	ySize = params.dataTypes[dataType]['size'][1]
               	img = np.delete(img, (ySize), axis=0) 
               	print("Accounting for pixel height differences.")
               except:
               	pass
               ###Manipulate the data as based on FEWS.NET data document to get NDVI from data.
               #eMODIS NDVI data are stretched (mapped) linearly (to byte values) as follows: 
               #[-1.0, 1.0] -> [0, 200] - Invalid Values: 201 - 255 
               #NDVI = (value - 100) / 100; example: [ (150 - 100) / 100 = 0.5 NDVI ]
            
               #print np.max(img)
               validmask = np.where(img<=200)
               invalidmask = np.where((img>200) | (img<100))
               #print "Max during:",np.max(img[validmask])
               img = img.astype(np.float32)
               img[validmask] = (img[validmask] - 100)/100.
               img[invalidmask] = img[invalidmask]*0+params.dataTypes[dataType]['fillValue']
            
               #print np.max(img)
               ds = None
               index = indexer.getIndexBasedOnDate(day,month,year)
               print month,"/",day,"/",year,"--Index->",index
               #print "Index:",index
            #print "Index:",index
               try:
					changed = False
					with open('/data/data/cserv/www/html/json/stats.json', 'r+') as f:
						data = json.load(f)
						for item in data['items']:
							if(item['name'] == 'casndvi'):
								ldatestring = item['Latest']
								ldate = datetime.datetime.strptime(ldatestring, "%d %m %Y")
								if ldate < filedate:
									print("file date is later")
									item['Latest'] = sdate
									changed = True
						if changed:
							f.seek(0)        # <--- should reset file position to the beginning.
							json.dump(data, f, indent=4)
							f.truncate()     # remove remaining part
               except Exception as e:
					print(e)
					pass
               dataStore.putData(index, img)
               img = None
               dataStore.close()
            except:
               print 'Failed adding tif'
               img = None
               dataStore.close()
    #dataStore.close()
    if prj is not None:
        dataS.writeSpatialInformation(params.dataTypes[dataType]['directory'],prj,grid,year)
Exemple #5
0
def ingest_IMERG(startYYYYMMDD, endYYYYMMDD):

    # Set the Datatype number
    current_DataTypeNumber = 34  # Hardcoded until there are more IMERG types in here..
    
    # Instance of Imerg Data Classes
    IMERG_DataClass =  IDC.IMERG_Data()
    
    # Convert to dates
    dateFormat = "%Y%m%d"
    start_Date = datetime.datetime.strptime(startYYYYMMDD, dateFormat)
    end_Date = datetime.datetime.strptime(endYYYYMMDD, dateFormat)
    
    # Build expected string list
    dataset_Obj_List = []
    #expected_Tif_FileNames = [] # 
    
    # iterate through all dates
    delta = end_Date - start_Date
    for i in range(delta.days + 1):
        #print start_Date + datetime.timedelta(days=i)
        currentDate = start_Date + datetime.timedelta(days=i)
        tifFileName = IMERG_DataClass.get_Expected_Tif_FileName(currentDate.year, currentDate.month, currentDate.day)
        #expected_Tif_FileNames.append(tifFileName)
        obj_To_Append = {
               "Tif_File_Name":tifFileName,
               "year":currentDate.year,
               "month":currentDate.month,
               "day":currentDate.day
               }
        dataset_Obj_List.append(obj_To_Append)
    
    # Get the expected file names.
    
    # Folder where TIF and TFW files end up.
    input_Dataset_Folder = params.dataTypes[current_DataTypeNumber]['inputDataLocation']

    # Other vars needed for the loop
    itemsCounter = 0
    ingest_Error_List = []
    capabilities_DateFormatString = "%Y_%m_%d"
    last_YYYY_MM_DD_Processed = None
    
    # Ingest specific stuff
    yearForHDF = int(startYYYYMMDD[0:4])  # Year for HDF File
    dataStore = dataS.datastorage(current_DataTypeNumber, yearForHDF, forWriting=True)
    indexer = params.dataTypes[current_DataTypeNumber]['indexer']
    
    # Do the actual ingest.
    #for fileName in expected_Tif_FileNames:
    for currentObj in dataset_Obj_List:
        
        try:
            # Try to ingest the file, record error if there is an error
            
            # open the file
            fileName = currentObj['Tif_File_Name']
			
            fileToProcess = os.path.join(input_Dataset_Folder,fileName)
            print(fileToProcess)
            if os.path.isfile(fileToProcess):
				print("")
            else:
				fileToProcess=fileToProcess.replace("03E","04A")
				if os.path.isfile(fileToProcess):
					print("")
				else:
					fileToProcess=fileToProcess.replace("04A","04B")            
            print("-Processing File: " + str(fileToProcess))
            
            # For some reason, we need to open TFW files instead of TIFs with GDAL..
            fileToProcess_TFW = IMERG_DataClass.convert_TIF_FileName_To_TFW_Filename(fileToProcess)

            theYear = yearForHDF #currentObj['year']
            theMonth = currentObj['month']
            theDay = currentObj['day']
            print("before geotiff")
            # Open / Read the file
            #ds = georead.openGeoTiff(fileToProcess_TFW)
            ds = georead.openGeoTiff_WithUpdateFlag(fileToProcess)
            print("after geotiff")

            # Set a new projection (since the IMERG data does not come with one already..)
            ds.SetProjection(IMERG_DataClass.get_DefaultProjection_String())
            ds.SetGeoTransform(IMERG_DataClass.get_DefaultGeoTransform_Obj())
            
            # Get the values to save (just like in all the other ingest procedures.
            prj = ds.GetProjection()
            grid = ds.GetGeoTransform()
                
            # Index it.
            img =  georead.readBandFromFile(ds, 1)
            print img
            ds = None
            index = indexer.getIndexBasedOnDate(theDay, theMonth, theYear)
            #print "Index:",index
            dataStore.putData(index, img)
            last_YYYY_MM_DD_Processed = str(theYear)+ "_" + str("%02d" % theMonth) + "_" + str("%02d" % theDay)
            itemsCounter += 1
            
            
            
        except:
            # do something in the event of an error
            e = sys.exc_info()[0]
            errorStr = "-ERROR Ingesting File: " + str(fileName) + " System Error Message: " + str(e)
            print(str(errorStr))
            ingest_Error_List.append(errorStr)
        
    # Close and save the data
    dataStore.close()
    
    if(itemsCounter > 0):
        dataS.writeSpatialInformation(params.dataTypes[current_DataTypeNumber]['directory'],prj,grid,yearForHDF)

        #print("Debug: processedFileNames: " + str(processedFileNames))
        #print("Debug: skippedFileNames: " + str(skippedFileNames))
        print("Finished processing, " + str(itemsCounter) + ", data items for year: " + str(yearForHDF))
    
        # need the projection and grid strings for the capabilities output.
        #retObject = {"projection":prj,"grid":grid}
    
        #return retObject
    
        # Update the capabilities
        try:
            print("-TODO, Check existing capabilities and overwrite only some parts rather than just overwriting with the last option... this was a shortcut taken to meet an expectation, budget about a day or so to fix this... right now, the last item ingested has it's date set as the 'END Date' for the capabilities range, (so if we are doing a simple reingest for a small subset in the middle of the data somewhere, this bug will show up..)")
            
            
            capabilities_Info = {
                         "name":params.dataTypes[current_DataTypeNumber]['name'],
                         "description":params.dataTypes[current_DataTypeNumber]['description'],
                         "size":params.dataTypes[current_DataTypeNumber]['size'],
                         "fillValue":params.dataTypes[current_DataTypeNumber]['fillValue'],
                         "data_category":params.dataTypes[current_DataTypeNumber]['data_category'],
                         "projection":prj,
                         "grid":grid,
                         
                        # Get the start and end Date range.
                         "startDateTime":"2015_03_08",
                         "endDateTime":last_YYYY_MM_DD_Processed,
                         "date_FormatString_For_ForecastRange":capabilities_DateFormatString
                         
                        # Other items to save?
                         
                         }
    
            # Write the capabilities info to the bddb
            theJSONString = json.dumps(capabilities_Info)
            # Create a connection to the DB, set the new values, close the connection
            conn = bdp.BDDbConnector_Capabilities()
            conn.set_DataType_Capabilities_JSON(current_DataTypeNumber, theJSONString)
            conn.close()
            
            print("-API Datatype Capabilities for datatype number: " +str(current_DataTypeNumber) + " written to local DB as: " + str(theJSONString))
            
        except:
            print("-WARNING: Data was ingested on this run AND there was an issue updating the API Capabilities local DB")
    
    else:
        print("No Items found for year: " + str(yearForHDF))
        print(str(len(ingest_Error_List)) + " errors associated with ingest items.")
        
    print("")
    print("Output of per-item Error Log: " + str(ingest_Error_List))
    print("")
Exemple #6
0
def ingestSubProcess_Year(current_DataTypeNumber, year):

    itemsCounter = 0
    inputYear = str(year)
    processedFileNames = []
    skippedFileNames = []

    dataStore = dataS.datastorage(current_DataTypeNumber,
                                  year,
                                  forWriting=True)
    indexer = params.dataTypes[current_DataTypeNumber]['indexer']
    inputdir = params.dataTypes[current_DataTypeNumber]['inputDataLocation']
    print("inputdir: " + inputdir)
    # Iterate through each file and do the processing
    for filename in os.listdir(inputdir):
        if filename.endswith(".tif"):
            fileToProcess = os.path.join(inputdir, filename)

            #print("Processing "+ str(fileToProcess))
            directory, fileonly = os.path.split(fileToProcess)

            # Get the Year, Month and Day the file represents
            dictionary = get_YearMonthDay_Obj_From_ClimateChange_FileName(
                fileonly)  # dateutils.breakApartChripsName(fileonly)

            # We only want items for the current year
            compareYear = str(dictionary['year'])
            #print("compareYear: " + compareYear)
            if compareYear == inputYear:
                year = dictionary['year']
                month = dictionary['month']
                day = dictionary['day']

                # Open / Read the file
                #print("opening ds")
                ds = georead.openGeoTiff(fileToProcess)
                #print("GetProjection")
                prj = ds.GetProjection()
                #print("GetGeoTransform")
                grid = ds.GetGeoTransform()
                #print("readBandFromFile")
                # Index it.
                img = georead.readBandFromFile(ds, 1)
                ds = None
                #print("getIndexBasedOnDate")
                index = indexer.getIndexBasedOnDate(day, month, year)
                #print "Index:",index
                dataStore.putData(index, img)
                #print("putData")
                processedFileNames.append(fileonly)
                #print("processedFileNames")
                itemsCounter += 1
            else:
                skippedFileNames.append(fileonly)

    # Close and save the data
    dataStore.close()
    print("data should be in ds now")
    if (itemsCounter > 0):
        print("trying to writeSpatialInformation")
        try:
            dataS.writeSpatialInformation(
                params.dataTypes[current_DataTypeNumber]['directory'], prj,
                grid, year)
        except Exception, e:
            print("Here's the error: " + str(e))
        #print("Debug: processedFileNames: " + str(processedFileNames))
        #print("Debug: skippedFileNames: " + str(skippedFileNames))
        print("Finished processing, " + str(itemsCounter) +
              ", data items for year: " + str(year))

        # need the projection and grid strings for the capabilities output.
        retObject = {"projection": prj, "grid": grid}

        return retObject
Exemple #7
0
def processYearByDirectory(dataType, year, inputdir):
    '''
    
    :param dataType:
    :param year:
    :param inputdir:
    '''
    ###Process the incoming data

    dataStore = dataS.datastorage(dataType, year, forWriting=True)
    indexer = params.dataTypes[dataType]['indexer']
    for filename in os.listdir(inputdir):
        if filename.endswith(".tif") and "chirps" in filename:

            fileToProcess = inputdir + "/" + filename
            print "Processing " + fileToProcess
            directory, fileonly = os.path.split(fileToProcess)

            dictionary = dateutils.breakApartChripsName(fileonly)
            year = dictionary['year']
            month = dictionary['month']
            day = dictionary['day']
            sdate = "{0} {1} {2}".format(day, month, year)
            filedate = datetime.datetime.strptime(sdate, "%d %m %Y")
            ds = georead.openGeoTiff(fileToProcess)
            prj = ds.GetProjection()
            grid = ds.GetGeoTransform()
            time.sleep(1)
            img = georead.readBandFromFile(ds, 1)

            index = indexer.getIndexBasedOnDate(day, month, year)
            print "Index:", index
            try:
                changed = False
                with open('/data/data/cserv/www/html/json/stats.json',
                          'r+') as f:
                    data = json.load(f)
                    for item in data['items']:
                        if (item['name'] == 'chirps'):
                            ldatestring = item['Latest']
                            ldate = datetime.datetime.strptime(
                                ldatestring, "%d %m %Y")
                            if ldate < filedate:
                                item['Latest'] = sdate
                                changed = True
                    if changed:
                        f.seek(
                            0
                        )  # <--- should reset file position to the beginning.
                        json.dump(data, f, indent=4)
                        f.truncate()  # remove remaining part
            except Exception as e:
                print("******************" + e +
                      "****************************")
                pass
            time.sleep(1)
            dataStore.putData(index, img)
            time.sleep(1)
            ds = None
    dataStore.close()
    dataS.writeSpatialInformation(params.dataTypes[dataType]['directory'], prj,
                                  grid, year)
def ingest_CHIRPSMonthly(startYYYYMM, endYYYYMM):
    # Set the Datatype number
    current_DataTypeNumber = 28  # Hardcoded until there is a better way to get this information (maybe params DB?)

    # Data Classes?

    # Convert to dates
    dateFormat = "%Y%m"
    start_Date = datetime.datetime.strptime(startYYYYMM, dateFormat)
    end_Date = datetime.datetime.strptime(endYYYYMM, dateFormat)

    # Build expected string list
    dataset_Obj_List = []

    end_Date = add_months(
        end_Date, 1)  # this is to fix that hacky while loop found below
    tempDate = start_Date
    while ((end_Date - tempDate).days > 0):
        # Date to be used inside the while loop
        currentDate = tempDate

        # From the FTP downloader for Chirps Monthly
        #theCurrentPath = ftp_FolderPath + "chirps-v2.0." + str(currentDate.year) + "." + str("%02d" % currentDate.month) + ".tif.gz"
        #expected_FTP_FilePaths.append(theCurrentPath)
        #print("-Expected Path: " + str(theCurrentPath))

        # Get the expected filename  # something like this should be part of a dataclasses object
        tifFileName = "chirps-v2.0." + str(currentDate.year) + "." + str(
            "%02d" % currentDate.month) + ".tif"

        # append the object
        obj_To_Append = {
            "Tif_File_Name": tifFileName,
            "year": currentDate.year,
            "month": currentDate.month,
            "day": currentDate.day
        }
        dataset_Obj_List.append(obj_To_Append)

        # Increment and set new temp value for while loop
        currentDate = add_months(tempDate, 1)
        tempDate = currentDate

    # Folder where TIF files end up after download.
    input_Dataset_Folder = params.dataTypes[current_DataTypeNumber][
        'inputDataLocation']

    # Other vars needed for the loop
    itemsCounter = 0
    ingest_Error_List = []
    capabilities_DateFormatString = "%Y_%m"
    last_YYYY_MM_DD_Processed = None

    # Ingest specific stuff
    yearForHDF = int(startYYYYMM[0:4])  # Year for HDF File
    dataStore = dataS.datastorage(current_DataTypeNumber,
                                  yearForHDF,
                                  forWriting=True)
    indexer = params.dataTypes[current_DataTypeNumber]['indexer']

    # Do the actual ingest.
    for currentObj in dataset_Obj_List:

        try:
            # Try to ingest the file, record error if there is an error

            # open the file
            fileName = currentObj['Tif_File_Name']
            fileToProcess = os.path.join(input_Dataset_Folder, fileName)

            print("-Processing File: " + str(fileToProcess))

            theYear = yearForHDF  #currentObj['year']
            theMonth = currentObj['month']
            theDay = 1  #currentObj['day'] # Monthly datasets use the first day of each month.

            # Open / Read the file
            ds = georead.openGeoTiff(fileToProcess)
            #ds = georead.openGeoTiff_WithUpdateFlag(fileToProcess)
            time.sleep(t)
            # If the dataset format does not come with a correct projection and transform, this is where to override them.
            # Set a new projection (since the IMERG data does not come with one already..)
            #ds.SetProjection(IMERG_DataClass.get_DefaultProjection_String())
            #ds.SetGeoTransform(IMERG_DataClass.get_DefaultGeoTransform_Obj())

            # Get the values to save (just like in all the other ingest procedures.
            prj = ds.GetProjection()
            grid = ds.GetGeoTransform()

            # Index it.
            img = georead.readBandFromFile(ds, 1)
            ds = None
            #index = indexer.getIndexBasedOnDate()
            index = indexer.getIndexBasedOnDate(theDay, theMonth, theYear)
            #print "Index:",index
            dataStore.putData(index, img)
            #last_YYYY_MM_DD_Processed = str(theYear)+ "_" + str("%02d" % theMonth) + "_" + str("%02d" % theDay)
            last_YYYY_MM_Processed = str(theYear) + "_" + str(
                "%02d" % theMonth)  # + "_" + str("%02d" % theDay)
            itemsCounter += 1

        except:
            # do something in the event of an error
            e = sys.exc_info()[0]
            errorStr = "-ERROR Ingesting File: " + str(
                fileName) + " System Error Message: " + str(e)
            print(str(errorStr))
            ingest_Error_List.append(errorStr)

    # Close and save the data
    dataStore.close()

    if (itemsCounter > 0):
        dataS.writeSpatialInformation(
            params.dataTypes[current_DataTypeNumber]['directory'], prj, grid,
            yearForHDF)

        #print("Debug: processedFileNames: " + str(processedFileNames))
        #print("Debug: skippedFileNames: " + str(skippedFileNames))
        print("Finished processing, " + str(itemsCounter) +
              ", data items for year: " + str(yearForHDF))

        # need the projection and grid strings for the capabilities output.
        #retObject = {"projection":prj,"grid":grid}

        #return retObject

        # Update the capabilities
        try:
            print(
                "-TODO, Check existing capabilities and overwrite only some parts rather than just overwriting with the last option... this was a shortcut taken to meet an expectation, budget about a day or so to fix this... right now, the last item ingested has it's date set as the 'END Date' for the capabilities range, (so if we are doing a simple reingest for a small subset in the middle of the data somewhere, this bug will show up..)"
            )

            capabilities_Info = {
                "name":
                params.dataTypes[current_DataTypeNumber]['name'],
                "description":
                params.dataTypes[current_DataTypeNumber]['description'],
                "size":
                params.dataTypes[current_DataTypeNumber]['size'],
                "fillValue":
                params.dataTypes[current_DataTypeNumber]['fillValue'],
                "data_category":
                params.dataTypes[current_DataTypeNumber]['data_category'],
                "projection":
                prj,
                "grid":
                grid,

                # Get the start and end Date range.
                "startDateTime":
                "1985_01",
                "endDateTime":
                last_YYYY_MM_Processed,
                "date_FormatString_For_ForecastRange":
                capabilities_DateFormatString

                # Other items to save?
            }

            # Write the capabilities info to the bddb
            theJSONString = json.dumps(capabilities_Info)
            # Create a connection to the DB, set the new values, close the connection
            conn = bdp.BDDbConnector_Capabilities()
            conn.set_DataType_Capabilities_JSON(current_DataTypeNumber,
                                                theJSONString)
            conn.close()

            print("-API Datatype Capabilities for datatype number: " +
                  str(current_DataTypeNumber) + " written to local DB as: " +
                  str(theJSONString))

        except:
            print(
                "-WARNING: Data was ingested on this run AND there was an issue updating the API Capabilities local DB"
            )

    else:
        print("No Items found for year: " + str(yearForHDF))
        print(
            str(len(ingest_Error_List)) +
            " errors associated with ingest items.")

    print("")
    print("Output of per-item Error Log: " + str(ingest_Error_List))
    print("")
Exemple #9
0
def processYearByDirectory(dataType,year, inputdir, nlastdate):
    '''
    
    :param dataType:
    :param year:
    :param inputdir:
    '''
    ###Process the incoming data
    print inputdir
    dataupdated = False
    dataStore = dataS.datastorage(dataType, year, forWriting=True)
    indexer = params.dataTypes[dataType]['indexer']
    for filename in os.listdir(inputdir):
        filesplit = filename.split('.') 
        fyear = filesplit[1]
        fmonth = filesplit[2][:2]
        fday = filesplit[2][2:]
        fdatestring = fday + " " + fmonth + " " + fyear
        fdate = datetime.datetime.strptime(fdatestring, "%d %m %Y")
        if fdate > nlastdate:
			if filename.endswith(".tif") and os.stat(inputdir+"/"+filename).st_size > 0:
				dataupdated = True
				fileToProcess = inputdir+"/"+filename
				print "Processing "+fileToProcess
				directory, fileonly = os.path.split(fileToProcess)
			
				dictionary = dateutils.breakApartGEFSNewName(fileonly) #name convention changed, update needed
				year = dictionary['year']
				month = dictionary['month']
				day = dictionary['day']
				sdate = "{0} {1} {2}".format(day, month, year)
				filedate = datetime.datetime.strptime(sdate, "%d %m %Y")
				ds = georead.openGeoTiff(fileToProcess)
				prj=ds.GetProjection()
				grid = ds.GetGeoTransform()
				# day = decad * 10
				# if month == int(2)  and day == int(30):
				#     day = 28
				img =  georead.readBandFromFile(ds, 1)
				ds = None
				index = indexer.getIndexBasedOnDate(day,month,year)
				print "Index:",index
				try:
					changed = False
					with open('/data/data/cserv/www/html/json/stats.json', 'r+') as f:
						data = json.load(f)
						for item in data['items']:
							if(item['name'] == 'gefsprecip'):
								ldatestring = item['Latest']
								ldate = datetime.datetime.strptime(ldatestring, "%d %m %Y")
								if ldate < filedate:
									print("file date is later")
									item['Latest'] = sdate
									changed = True
						if changed:
							f.seek(0)        # <--- should reset file  position to the beginning.
							json.dump(data, f, indent=4)
							f.truncate()     # remove remaining part
				except Exception as e:
					print(e)
					pass
				dataStore.putData(index, img)

    dataStore.close()
    if dataupdated:
		dataS.writeSpatialInformation(params.dataTypes[dataType]['directory'],prj,grid,year)
def processDataStarting(yyyy, mm, dd):
    dataType = 0
    indexer = params.dataTypes[dataType]['indexer']
    inputdir = params.dataTypes[0][
        'inputDataLocation'] + yyyy  # will need to update this if year changes
    dataStore = dataS.datastorage(
        dataType, int(yyyy),
        forWriting=True)  # will need to update this if year changes
    ldatestring = dd + " " + mm + " " + yyyy
    ldate = datetime.datetime.strptime(ldatestring, "%d %m %Y")
    date = ldate + datetime.timedelta(days=1)

    while date.date() < datetime.datetime.now().date():
        fileToProcess = inputdir + "/chirp." + date.strftime(
            '%Y.%m.%d') + ".tif"

        if os.path.exists(
                fileToProcess) and os.path.getsize(fileToProcess) > 0:
            print "file exists, ingest started on: " + fileToProcess
            directory, fileonly = os.path.split(fileToProcess)
            dictionary = dateutils.breakApartChripsName(fileonly)
            year = dictionary['year']
            month = dictionary['month']
            day = dictionary['day']
            sdate = "{0} {1} {2}".format(day, month, year)
            filedate = datetime.datetime.strptime(sdate, "%d %m %Y")
            ds = georead.openGeoTiff(fileToProcess)
            prj = ds.GetProjection()
            grid = ds.GetGeoTransform()

            img = georead.readBandFromFile(ds, 1)
            ds = None
            index = indexer.getIndexBasedOnDate(date.day, date.month,
                                                date.year)
            print "Index:", index
            c = np.array(dataStore.getData(index))
            if (c == -9999).all() == True:
                dataStore.putData(index, img)
                print date.strftime('%Y.%m.%d') + " data added to hdf"
                try:
                    changed = False
                    with open('/data/data/cserv/www/html/json/stats.json',
                              'r+') as f:
                        data = json.load(f)
                        for item in data['items']:
                            if (item['name'] == 'chirp'):
                                ldatestring = item['Latest']
                                ldate = date  #.strftime("%d %m %Y") #datetime.datetime.strptime(ldatestring, "%d %m %Y")
                                if ldate < filedate:
                                    item['Latest'] = sdate
                                    changed = True
                        if changed:
                            f.seek(
                                0
                            )  # <--- should reset file  position to the beginning.
                            json.dump(data, f, indent=4)
                            f.truncate()  # remove remaining part
                except Exception as e:
                    print(e)
                    pass

            else:
                print date.strftime('%Y.%m.%d') + " data already in hdf"
        else:
            print "nothing to ingest "
        date = date + datetime.timedelta(days=1)

    dataStore.close()