Ejemplos de datastorage en Python

Lenguaje de programación: Python

Namespace/Package Name: CHIRPS.utils.file.h5datastorage

Método / Función: datastorage

Ejemplos en hotexamples.com: 15

Python datastorage - 15 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de CHIRPS.utils.file.h5datastorage.datastorage extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Ejemplo n.º 1

Mostrar archivo

def getYearValue(year, bounds, clippedmask, dataType, operationsType):
    '''
     
    :param year:
    :param bounds:
    :param clippedmask:
    :param dataType:
    :param operationsType:
    '''
    logger.debug("getYear Value year=" + str(year) + " datatype=" +
                 str(dataType))
    mathoper = pMath.mathOperations(operationsType, 12,
                                    params.dataTypes[dataType]['fillValue'],
                                    None)
    try:
        store = dStore.datastorage(dataType, year)
        indexer = params.dataTypes[dataType]['indexer']
        fillValue = params.getFillValue(dataType)
        indexes = indexer.getIndexesBasedOnDate(1, 1, year, 31, 12, year)

        for i in indexes:
            array = store.getData(i, bounds=bounds)
            mask = np.where((array != fillValue) & (clippedmask == True))
            if np.size(mask) > 0:
                mathoper.addData(array[mask])

        del mask
        del array
        store.close()
        value = mathoper.getOutput()
        mathoper.cleanup()
        return value
    except:
        return mathoper.getFillValue()

Ejemplo n.º 2

Mostrar archivo

def processYearByDirectory(dataType,year, inputdir, ldate):
    '''
    
    :param dataType:
    :param year:
    :param inputdir:
    '''
    ###Process the incoming data
    dataupdated = False
    dataStore = dataS.datastorage(dataType, year, forWriting=True)
    indexer = params.dataTypes[dataType]['indexer']
    for filename in os.listdir(inputdir):
        filesplit = filename.split('.') 
        fyear = filesplit[1]
        fmonth = filesplit[2][:2]
        fday = filesplit[2][2:]
        fdatestring = fday + " " + fmonth + " " + fyear
        fdate = datetime.datetime.strptime(fdatestring, "%d %m %Y")
        if fdate > ldate:
			if filename.endswith(".tif") and os.stat(inputdir+"/"+filename).st_size > 0:
				dataupdated = True
				fileToProcess = inputdir+"/"+filename
				print "Processing "+fileToProcess
				directory, fileonly = os.path.split(fileToProcess)
			
				dictionary = dateutils.breakApartGEFSNewName(fileonly) #name convention changed, update needed
				year = dictionary['year']
				month = dictionary['month']
				day = dictionary['day']
				sdate = "{0} {1} {2}".format(day, month, year)
				filedate = datetime.datetime.strptime(sdate, "%d %m %Y")
				ds = georead.openGeoTiff(fileToProcess)
				prj=ds.GetProjection()
				grid = ds.GetGeoTransform()
				# day = decad * 10
				# if month == int(2)  and day == int(30):
				#     day = 28
				img =  georead.readBandFromFile(ds, 1)
				ds = None
				index = indexer.getIndexBasedOnDate(day,month,year)
				print "Index:",index
				c = np.array(dataStore.getData(index))
				if(c==-9999).all() == True:
					dataStore.putData(index, img)
				else:
					print fdate.strftime('%Y.%m.%d') + " data already in hdf"
        else:
			print "file date b4 late date"
    dataStore.close()
    if dataupdated:
		dataS.writeSpatialInformation(params.dataTypes[dataType]['directory'],prj,grid,year)

Ejemplo n.º 3

Mostrar archivo

def getArrayForYearMonthDay(year, month, day, dataType):
    '''
      
    :param year:
    :param month:
    :param day:
    :param dataType:
    '''
    try:
        store = dStore.datastorage(dataType, year)
        indexer = params.dataTypes[dataType]['indexer']
        index = indexer.getIndexBasedOnDate(day, month, year)
        array = store.getData(index)
        return array
        store.close()
    except:
        return []
    return array

Ejemplo n.º 4

Mostrar archivo

Archivo: HDFIngestMODISNDVIData.py Proyecto: SERVIR/ClimateSERV-2.0-Server

def averageDecadalData(dataType, year, indexList):
    print "Averaging Decadal Data"
    dataStore = dataS.datastorage(dataType, year, forWriting=True)
    i = 0
    try:
        while i < len(indexList) - 1:
            if np.issubdtype(type(indexList[i]),
                             np.integer) != True and np.issubdtype(
                                 type(indexList[i + 2]), np.integer) != True:
                first = np.array(indexList[i])
                second = np.array(indexList[i + 2])
                #averaged = np.nanmean( np.array([ first , second ]) )
                #averaged = (first + second) / 2
                print "adding averaged data to index: " + str(i + 1)
                dataStore.putData(i + 1, averaged)
            i = i + 2
    except Exception as e:
        print("there was an exception: " + str(e))
    dataStore.close()

Ejemplo n.º 5

Mostrar archivo

Archivo: HDFIngestSMAPData.py Proyecto: SERVIR/ClimateSERV-2.0-Server

def processYearByDirectory(dataType, year, inputdir):
    '''
    
    :param dataType:
    :param year:
    :param inputdir:
    '''
    ###Process the incoming data

    dataStore = dataS.datastorage(dataType, year, forWriting=True)
    indexer = params.dataTypes[dataType]['indexer']
    for filename in os.listdir(inputdir):
        if filename.endswith(".tif"):

            fileToProcess = inputdir + "/" + filename
            print "Processing " + fileToProcess
            directory, fileonly = os.path.split(fileToProcess)

            dictionary = dateutils.breakApartSmapName(fileonly)
            year = int(dictionary['year'])
            day = int(dictionary['day'])
            month = int(dictionary['month'])

            ds = georead.openGeoTiff(fileToProcess)
            prj = ds.GetProjection()
            grid = ds.GetGeoTransform()

            img = georead.readBandFromFile(ds, 1)
            ds = None
            index = indexer.getIndexBasedOnDate(day, month, year)
            print "Index:", index
            dataStore.putData(index, img)

    dataStore.close()
    dataS.writeSpatialInformation(params.dataTypes[dataType]['directory'], prj,
                                  grid, year)

Ejemplo n.º 6

Mostrar archivo

Archivo: GeneratorUtils.py Proyecto: SERVIR/ClimateSERV-2.0-Server

 def _get_DataStore(self, dataTypeNumber, yearValue, forWriting):
     retDataStore = dStore.datastorage(dataTypeNumber, yearValue,
                                       forWriting)
     return retDataStore

Ejemplo n.º 7

Mostrar archivo

def processYearByDirectory(dataType, year, inputdir):
    '''
    
    :param dataType:
    :param year:
    :param inputdir:
    '''
    ###Process the incoming data
    global dataName
    if dataType == 29:
        dataName = 'esi4week'
    else:
        dataName = 'esi12week'
    filePattern = None
    with open('/data/data/cserv/www/html/json/stats.json', 'r+') as f:
        data = json.load(f)
        theDate = filter(getESIDate, data['items'])[0]['Latest']
        filePattern = theDate.split(' ')[2] + str("%03d" % (
            (datetime.datetime.strptime(theDate, '%d %M %Y') -
             datetime.datetime(2019, 1, 1)).days + 1, ))

    dataStore = dataS.datastorage(dataType, year, forWriting=True)
    indexer = params.dataTypes[dataType]['indexer']
    for filename in os.listdir(inputdir):
        if filename.endswith(".tif") and int(
                getDatePattern(filename)) > int(filePattern):

            fileToProcess = inputdir + "/" + filename
            print "Processing " + fileToProcess
            directory, fileonly = os.path.split(fileToProcess)

            dictionary = dateutils.breakApartEsiName(fileonly)
            year = dictionary['year']
            month = dictionary['month']
            day = dictionary['day']
            sdate = "{0} {1} {2}".format(day, month, year)
            filedate = datetime.datetime.strptime(sdate, "%d %m %Y")
            ds = georead.openGeoTiff(fileToProcess)
            prj = ds.GetProjection()
            grid = ds.GetGeoTransform()

            img = georead.readBandFromFile(ds, 1)
            ds = None
            index = indexer.getIndexBasedOnDate(day, month, year)
            print "Index:", index
            try:
                changed = False
                with open('/data/data/cserv/www/html/json/stats.json',
                          'r+') as f:
                    data = json.load(f)
                    for item in data['items']:
                        print(item['name'])
                        if (item['name'] == dataName):
                            ldatestring = item['Latest']
                            ldate = datetime.datetime.strptime(
                                ldatestring, "%d %m %Y")
                            print("in here")
                            print("ldate: " + str(ldate))
                            if ldate < filedate:
                                print("file date is later")
                                item['Latest'] = sdate
                                changed = True
                    if changed:
                        f.seek(
                            0
                        )  # <--- should reset file position to the beginning.
                        json.dump(data, f, indent=4)
                        f.truncate()  # remove remaining part
            except Exception as e:
                print(e)
                pass
            dataStore.putData(index, img)

    dataStore.close()

Ejemplo n.º 8

Mostrar archivo

def processYearByDirectory(dataType,year, inputdir):
    '''
    
    :param dataType:
    :param year:
    :param inputdir:
    '''
    ###Process the incoming data
    prj= None
    #dataStore = dataS.datastorage(dataType, year, forWriting=True)
    indexer = params.dataTypes[dataType]['indexer']
    for filename in os.listdir(inputdir):
        if filename.endswith(".tif"):
            try:
               dataStore = dataS.datastorage(dataType, year, forWriting=True)
               fileToProcess = inputdir+"/"+filename
               print "Processing "+fileToProcess
               directory, fileonly = os.path.split(fileToProcess)
        
               dictionary = dateutils.breakApartemodisNameAdjust(fileonly, 3)

               year = dictionary['year']
               month = dictionary['month']

               day = dictionary['day']
               sdate = "{0} {1} {2}".format(day, month, year)
               filedate = datetime.datetime.strptime(sdate, "%d %m %Y")
               ds = georead.openGeoTiff(fileToProcess)
               prj=ds.GetProjection()
               grid = ds.GetGeoTransform()

               img =  georead.readBandFromFile(ds, 1)
               try:
               	xSize = params.dataTypes[dataType]['size'][0]
               	img = np.delete(img, (xSize), axis=1)
               	print("Accounting for pixel width differences.")
               except:
               	pass
               try:
               	ySize = params.dataTypes[dataType]['size'][1]
               	img = np.delete(img, (ySize), axis=0) 
               	print("Accounting for pixel height differences.")
               except:
               	pass
               ###Manipulate the data as based on FEWS.NET data document to get NDVI from data.
               #eMODIS NDVI data are stretched (mapped) linearly (to byte values) as follows: 
               #[-1.0, 1.0] -> [0, 200] - Invalid Values: 201 - 255 
               #NDVI = (value - 100) / 100; example: [ (150 - 100) / 100 = 0.5 NDVI ]
            
               #print np.max(img)
               validmask = np.where(img<=200)
               invalidmask = np.where((img>200) | (img<100))
               #print "Max during:",np.max(img[validmask])
               img = img.astype(np.float32)
               img[validmask] = (img[validmask] - 100)/100.
               img[invalidmask] = img[invalidmask]*0+params.dataTypes[dataType]['fillValue']
            
               #print np.max(img)
               ds = None
               index = indexer.getIndexBasedOnDate(day,month,year)
               print month,"/",day,"/",year,"--Index->",index
               #print "Index:",index
            #print "Index:",index
               try:
					changed = False
					with open('/data/data/cserv/www/html/json/stats.json', 'r+') as f:
						data = json.load(f)
						for item in data['items']:
							if(item['name'] == 'casndvi'):
								ldatestring = item['Latest']
								ldate = datetime.datetime.strptime(ldatestring, "%d %m %Y")
								if ldate < filedate:
									print("file date is later")
									item['Latest'] = sdate
									changed = True
						if changed:
							f.seek(0)        # <--- should reset file position to the beginning.
							json.dump(data, f, indent=4)
							f.truncate()     # remove remaining part
               except Exception as e:
					print(e)
					pass
               dataStore.putData(index, img)
               img = None
               dataStore.close()
            except:
               print 'Failed adding tif'
               img = None
               dataStore.close()
    #dataStore.close()
    if prj is not None:
        dataS.writeSpatialInformation(params.dataTypes[dataType]['directory'],prj,grid,year)

Ejemplo n.º 9

Mostrar archivo

def ingest_IMERG(startYYYYMMDD, endYYYYMMDD):

    # Set the Datatype number
    current_DataTypeNumber = 34  # Hardcoded until there are more IMERG types in here..
    
    # Instance of Imerg Data Classes
    IMERG_DataClass =  IDC.IMERG_Data()
    
    # Convert to dates
    dateFormat = "%Y%m%d"
    start_Date = datetime.datetime.strptime(startYYYYMMDD, dateFormat)
    end_Date = datetime.datetime.strptime(endYYYYMMDD, dateFormat)
    
    # Build expected string list
    dataset_Obj_List = []
    #expected_Tif_FileNames = [] # 
    
    # iterate through all dates
    delta = end_Date - start_Date
    for i in range(delta.days + 1):
        #print start_Date + datetime.timedelta(days=i)
        currentDate = start_Date + datetime.timedelta(days=i)
        tifFileName = IMERG_DataClass.get_Expected_Tif_FileName(currentDate.year, currentDate.month, currentDate.day)
        #expected_Tif_FileNames.append(tifFileName)
        obj_To_Append = {
               "Tif_File_Name":tifFileName,
               "year":currentDate.year,
               "month":currentDate.month,
               "day":currentDate.day
               }
        dataset_Obj_List.append(obj_To_Append)
    
    # Get the expected file names.
    
    # Folder where TIF and TFW files end up.
    input_Dataset_Folder = params.dataTypes[current_DataTypeNumber]['inputDataLocation']

    # Other vars needed for the loop
    itemsCounter = 0
    ingest_Error_List = []
    capabilities_DateFormatString = "%Y_%m_%d"
    last_YYYY_MM_DD_Processed = None
    
    # Ingest specific stuff
    yearForHDF = int(startYYYYMMDD[0:4])  # Year for HDF File
    dataStore = dataS.datastorage(current_DataTypeNumber, yearForHDF, forWriting=True)
    indexer = params.dataTypes[current_DataTypeNumber]['indexer']
    
    # Do the actual ingest.
    #for fileName in expected_Tif_FileNames:
    for currentObj in dataset_Obj_List:
        
        try:
            # Try to ingest the file, record error if there is an error
            
            # open the file
            fileName = currentObj['Tif_File_Name']
			
            fileToProcess = os.path.join(input_Dataset_Folder,fileName)
            print(fileToProcess)
            if os.path.isfile(fileToProcess):
				print("")
            else:
				fileToProcess=fileToProcess.replace("03E","04A")
				if os.path.isfile(fileToProcess):
					print("")
				else:
					fileToProcess=fileToProcess.replace("04A","04B")            
            print("-Processing File: " + str(fileToProcess))
            
            # For some reason, we need to open TFW files instead of TIFs with GDAL..
            fileToProcess_TFW = IMERG_DataClass.convert_TIF_FileName_To_TFW_Filename(fileToProcess)

            theYear = yearForHDF #currentObj['year']
            theMonth = currentObj['month']
            theDay = currentObj['day']
            print("before geotiff")
            # Open / Read the file
            #ds = georead.openGeoTiff(fileToProcess_TFW)
            ds = georead.openGeoTiff_WithUpdateFlag(fileToProcess)
            print("after geotiff")

            # Set a new projection (since the IMERG data does not come with one already..)
            ds.SetProjection(IMERG_DataClass.get_DefaultProjection_String())
            ds.SetGeoTransform(IMERG_DataClass.get_DefaultGeoTransform_Obj())
            
            # Get the values to save (just like in all the other ingest procedures.
            prj = ds.GetProjection()
            grid = ds.GetGeoTransform()
                
            # Index it.
            img =  georead.readBandFromFile(ds, 1)
            print img
            ds = None
            index = indexer.getIndexBasedOnDate(theDay, theMonth, theYear)
            #print "Index:",index
            dataStore.putData(index, img)
            last_YYYY_MM_DD_Processed = str(theYear)+ "_" + str("%02d" % theMonth) + "_" + str("%02d" % theDay)
            itemsCounter += 1
            
            
            
        except:
            # do something in the event of an error
            e = sys.exc_info()[0]
            errorStr = "-ERROR Ingesting File: " + str(fileName) + " System Error Message: " + str(e)
            print(str(errorStr))
            ingest_Error_List.append(errorStr)
        
    # Close and save the data
    dataStore.close()
    
    if(itemsCounter > 0):
        dataS.writeSpatialInformation(params.dataTypes[current_DataTypeNumber]['directory'],prj,grid,yearForHDF)

        #print("Debug: processedFileNames: " + str(processedFileNames))
        #print("Debug: skippedFileNames: " + str(skippedFileNames))
        print("Finished processing, " + str(itemsCounter) + ", data items for year: " + str(yearForHDF))
    
        # need the projection and grid strings for the capabilities output.
        #retObject = {"projection":prj,"grid":grid}
    
        #return retObject
    
        # Update the capabilities
        try:
            print("-TODO, Check existing capabilities and overwrite only some parts rather than just overwriting with the last option... this was a shortcut taken to meet an expectation, budget about a day or so to fix this... right now, the last item ingested has it's date set as the 'END Date' for the capabilities range, (so if we are doing a simple reingest for a small subset in the middle of the data somewhere, this bug will show up..)")
            
            
            capabilities_Info = {
                         "name":params.dataTypes[current_DataTypeNumber]['name'],
                         "description":params.dataTypes[current_DataTypeNumber]['description'],
                         "size":params.dataTypes[current_DataTypeNumber]['size'],
                         "fillValue":params.dataTypes[current_DataTypeNumber]['fillValue'],
                         "data_category":params.dataTypes[current_DataTypeNumber]['data_category'],
                         "projection":prj,
                         "grid":grid,
                         
                        # Get the start and end Date range.
                         "startDateTime":"2015_03_08",
                         "endDateTime":last_YYYY_MM_DD_Processed,
                         "date_FormatString_For_ForecastRange":capabilities_DateFormatString
                         
                        # Other items to save?
                         
                         }
    
            # Write the capabilities info to the bddb
            theJSONString = json.dumps(capabilities_Info)
            # Create a connection to the DB, set the new values, close the connection
            conn = bdp.BDDbConnector_Capabilities()
            conn.set_DataType_Capabilities_JSON(current_DataTypeNumber, theJSONString)
            conn.close()
            
            print("-API Datatype Capabilities for datatype number: " +str(current_DataTypeNumber) + " written to local DB as: " + str(theJSONString))
            
        except:
            print("-WARNING: Data was ingested on this run AND there was an issue updating the API Capabilities local DB")
    
    else:
        print("No Items found for year: " + str(yearForHDF))
        print(str(len(ingest_Error_List)) + " errors associated with ingest items.")
        
    print("")
    print("Output of per-item Error Log: " + str(ingest_Error_List))
    print("")

Ejemplo n.º 10

Mostrar archivo

def getDayValue(year, month, day, bounds, clippedmask, dataType,
                operationsType, polygon_Str_ToPass, uid):  # geometryToClip
    '''
    
    :param year:
    :param month:
    :param day:
    :param bounds:
    :param clippedmask:
    :param dataType: This is actually the datatype number (int)
    :param operationsType:
    '''
    # print "Getting Day value ",year,month,day
    #Single item in one dimension
    #Calculate index for the day using 31 days in every month
    logger.debug("getDay Value year=" + str(year) + "  month=" + str(month) +
                 " day=" + str(day) + " datatype=" + str(dataType))

    # KS Refactor 2015 // This is where I'm intercepting the code to add the new 'download' operation at the worker thread level
    if (params.parameters[operationsType][1] == 'download'):
        # Do the download stuff
        #logger.debug("DataCalculator:getDayValue: TODO: Finish the code that creates a tif file from all the inputs we have here!")
        onErrorReturnValue = 0  # 0 for failures?  (555 is just a place holder to see if this all works!!)
        try:

            # Param Checking   (Compared to the test controller function in HDFDataToFile)
            theDataTypeNumber = dataType  # formerly 'theDataType'
            size = params.getGridDimension(int(theDataTypeNumber))
            geotransform, wkt = rp.getSpatialReference(int(theDataTypeNumber))
            theBounds = bounds  #mg.getPolyBoundsOnly(geoTrans,polygon):

            #polygon_Str_ToPass
            #geometry = geometryToClip # Had to pipe this one in as a new dictionary param from the head processor!!!
            geometry = geoutils.decodeGeoJSON(polygon_Str_ToPass)

            theYear = year  # Get this from param 'year'  (Passed in as part of a dictionary object)  (also applies for month, and day)
            theMonth = month
            theDay = day

            # Worker Section
            theStore = dStore.datastorage(theDataTypeNumber, theYear)
            theIndexer = params.dataTypes[theDataTypeNumber]['indexer']
            theFillValue = params.getFillValue(theDataTypeNumber)
            theIndex = theIndexer.getIndexBasedOnDate(theDay, theMonth,
                                                      theYear)

            hdf_Data_Array = None
            try:
                hdf_Data_Array = theStore.getData(theIndex, bounds=theBounds)

            except:

                firstErrorMessage = str(sys.exc_info())
                logger.debug(
                    "DataCalculator: Download Job ERROR getting data from H5 to hdf_Data_Array: We are inside 2 try/except blocks.  firstErrorMessage:  "
                    + str(firstErrorMessage) +
                    ",  Trying something crazy before bailing out!")
                # Last ditch effort, lets replace the buggy h5py functions
                try:
                    # This did not work... it actually caused a crash that looked worse than the other one.
                    #h5py._hl.selections._translate_slice = _ReplacementForFunc_translate_slice
                    #hdf_Data_Array = theStore.getData_AlternateH5PyFunc(theIndex, _ReplacementForFunc_translate_slice, bounds=theBounds)

                    # This did not work either, it ended up selecting inverse x range
                    #hdf_Data_Array = theStore.getData_AlternateH5PyFunc(theIndex, bounds=theBounds)
                    # Wrote a bit of code in my
                    # Next attempt is to get two sets of bounds and two sets of datasets.... and then stitch them together!!
                    # Here is the near final version of this
                    breakPoint = 0  # I seriously can't believe I just wrote this block of code without testing it, and it seemed to work the first try!!
                    theBounds_Part1 = (theBounds[0], (breakPoint - 1),
                                       theBounds[2], theBounds[3])
                    theBounds_Part2 = (breakPoint, theBounds[1], theBounds[2],
                                       theBounds[3])
                    hdf_Data_Array_Part1 = theStore.getData(
                        theIndex, bounds=theBounds_Part1)
                    hdf_Data_Array_Part2 = theStore.getData(
                        theIndex, bounds=theBounds_Part2)
                    theHeight_Of_New_Array = hdf_Data_Array_Part1.shape[0]
                    theWidth_Of_New_Array = hdf_Data_Array_Part1.shape[
                        1] + hdf_Data_Array_Part2.shape[1]
                    stitchedData_Array = np.zeros(
                        shape=(theHeight_Of_New_Array, theWidth_Of_New_Array),
                        dtype=np.float32)
                    for currentRowIndex in range(0, theHeight_Of_New_Array):
                        tempRow = np.zeros(shape=(theWidth_Of_New_Array),
                                           dtype=np.float32)
                        for currValueIndex_1 in range(
                                0, hdf_Data_Array_Part1.shape[1]):
                            currentValue = hdf_Data_Array_Part1[
                                currentRowIndex][currValueIndex_1]
                            tempRow[currValueIndex_1] = currentValue
                        for currValueIndex_2 in range(
                                0, hdf_Data_Array_Part2.shape[1]):
                            currentValueIndex_2_Adjusted = currValueIndex_2 + hdf_Data_Array_Part1.shape[
                                1]
                            currentValue = hdf_Data_Array_Part2[
                                currentRowIndex][currValueIndex_2]
                            tempRow[
                                currentValueIndex_2_Adjusted] = currentValue
                        stitchedData_Array[currentRowIndex] = tempRow

                    # here goes...
                    hdf_Data_Array = stitchedData_Array
                except:
                    #e = sys.exc_info()[0]
                    # If this error keeps happening and can't figure it out,, read HDFDataToFile line 138 to see some more detailed notes on this issue.
                    logger.debug(
                        "DataCalculator: Download Job ERROR getting data from H5 to hdf_Data_Array: We are inside 2 try/except blocks, and the second one failed..  firstErrorMessage:  "
                        + str(firstErrorMessage) + " System Error Message: " +
                        str(sys.exc_info()))
                    return onErrorReturnValue

            # Points processing from geometry value
            thePoints = geometry  # New Context for 'geometry'!
            theLats = []
            theLongs = []

            # Get the list of lats and longs from the geometry points
            for p in range(thePoints.GetPointCount()):
                theLats.append(thePoints.GetY(p))
                theLongs.append(thePoints.GetX(p))

            # Get the Min Longitude and Max Latitude (Top Left Corner)
            minLong = min(theLongs)
            maxLat = max(theLats)

            # Adjust the max lat and min long for negative values (Need to make sure this works for datatypes other than climate model outputs)
            adjusted_Min_Long = minLong
            adjusted_Max_Lat = maxLat
            if (minLong < 0):
                #adjusted_Min_Long = minLong + 360
                adjusted_Min_Long = minLong
            if (maxLat < 0):
                #adjusted_Max_Lat = abs(maxLat) + 90    # This line caused images selected below 0 lat to be in a very wrong position (off by 97 ish on one test)
                #adjusted_Max_Lat = abs(maxLat) - 90
                adjusted_Max_Lat = maxLat

            # This quick fix did not work well enough... need something better.
            ## Quick Fix for 'bug 3 pixels off by half a degree'
            #pixel_Resolution_X = 0.5   # grid[1]
            #if(adjusted_Min_Long < 180):
            #    adjusted_Min_Long = adjusted_Min_Long + ( - ( pixel_Resolution_X / 2) )
            #else:
            #    adjusted_Min_Long = adjusted_Min_Long + (   ( pixel_Resolution_X / 2) )
            #pixel_Resolution_Y = -0.5   # grid[5]
            #if(adjusted_Max_Lat > 0):
            #    adjusted_Max_Lat = adjusted_Max_Lat + ( - ( abs(pixel_Resolution_Y) / 2) )
            #else:
            #    adjusted_Max_Lat = adjusted_Max_Lat + (   ( abs(pixel_Resolution_Y) / 2) )

            # Outfile transform x,y positions set using the adjusted min long and max lat
            outTransform_xPos = adjusted_Min_Long
            outTransform_yPos = adjusted_Max_Lat

            # Need this later
            noData_Value = theFillValue
            bandName = 1

            fullDatset_GeoTransform = geotransform
            outFullGeoTransform = (outTransform_xPos,
                                   fullDatset_GeoTransform[1],
                                   fullDatset_GeoTransform[2],
                                   outTransform_yPos,
                                   fullDatset_GeoTransform[4],
                                   fullDatset_GeoTransform[5])

            fullDataset_Projection = wkt

            uniqueID = uid  # Entire Job ID

            # Process the filename
            outFileName = extractTif.get_Tif_FileOutName(
                theDataTypeNumber, theYear, theMonth, theDay)
            outFileFolder = params.zipFile_ScratchWorkspace_Path + str(
                uid) + "/"
            outFileFullPath = outFileFolder + outFileName

            #logger.debug("Alert: 1")

            #logger.debug("Alert: 2")

            # Get the output File size
            out_X_Size = hdf_Data_Array.shape[1]
            out_Y_Size = hdf_Data_Array.shape[0]

            # Get the gdal driver and create the a blank output file
            theDriverFormat = "GTiff"
            theDriver = gdal.GetDriverByName(theDriverFormat)

            #logger.debug("Alert: 3")

            outDS = theDriver.Create(outFileFullPath, out_X_Size, out_Y_Size,
                                     1, GDT_Float32)

            #logger.debug("Alert: 4")

            # Get the image band and write the data array values to it.  Flush the Cache and set the NoDataValue (This is the step that writes data to the output file)
            outDataArray = hdf_Data_Array
            outBand = outDS.GetRasterBand(bandName)
            outBand.WriteArray(outDataArray, 0, 0)
            outBand.SetNoDataValue(noData_Value)
            outBand.FlushCache()

            #logger.debug("Alert: 5")

            # Set the projection and transform
            outDS.SetGeoTransform(outFullGeoTransform)
            outDS.SetProjection(fullDataset_Projection)

            # closes the dataset (Very important!)
            outDS = None

            #logger.debug("Alert: 6")

            # That should be it... we should now have a tif file located in the zipfile scratch area... and many, for each time this is run!

            # If we got this far, return '1' as a way to signal that it all worked and the current Tif file should be created.
            return 1
        except:
            # Something went wrong.
            logger.debug(
                "DataCalculator: Download Job ERROR: Not sure what went wrong... System Error Message: "
                + str(sys.exc_info()))
            return onErrorReturnValue
            pass

        # It's looking like we can use this return to be a 1 or 0 (if the tif file was generated or not?)
        return onErrorReturnValue
    else:

        # Normal Statistical operations
        mathoper = pMath.mathOperations(
            operationsType, 1, params.dataTypes[dataType]['fillValue'], None)
        try:
            store = dStore.datastorage(dataType, year)

            #logger.debug("DataCalculator Alert A")

            indexer = params.dataTypes[dataType]['indexer']

            #logger.debug("DataCalculator Alert B")

            fillValue = params.getFillValue(dataType)

            #logger.debug("DataCalculator Alert C")

            index = indexer.getIndexBasedOnDate(day, month, year)

            #logger.debug("DataCalculator Alert D")

            # This fix worked for the downloads... lets see if it works here too!
            array_H5Data = None
            try:
                array_H5Data = store.getData(index, bounds=bounds)
                logger.debug("BBBBBBBBBBBBB")
            except:
                firstErrorMessage = str(sys.exc_info())
                logger.debug(
                    "DataCalculator: Statistics Job ERROR getting data from H5 to array_H5Data: We are inside 2 try/except blocks.  firstErrorMessage:  "
                    + str(firstErrorMessage) +
                    ",  Trying something crazy before bailing out!")
                # Last ditch effort, lets replace the buggy h5py functions
                try:
                    # Vars we need in here..
                    theBounds = bounds
                    theStore = store
                    theIndex = index

                    # Stitch the two arrays together
                    breakPoint = 0
                    theBounds_Part1 = (theBounds[0], (breakPoint - 1),
                                       theBounds[2], theBounds[3])
                    theBounds_Part2 = (breakPoint, theBounds[1], theBounds[2],
                                       theBounds[3])
                    hdf_Data_Array_Part1 = theStore.getData(
                        theIndex, bounds=theBounds_Part1)
                    hdf_Data_Array_Part2 = theStore.getData(
                        theIndex, bounds=theBounds_Part2)
                    theHeight_Of_New_Array = hdf_Data_Array_Part1.shape[0]
                    theWidth_Of_New_Array = hdf_Data_Array_Part1.shape[
                        1] + hdf_Data_Array_Part2.shape[1]
                    stitchedData_Array = np.zeros(
                        shape=(theHeight_Of_New_Array, theWidth_Of_New_Array),
                        dtype=np.float32)
                    for currentRowIndex in range(0, theHeight_Of_New_Array):
                        tempRow = np.zeros(shape=(theWidth_Of_New_Array),
                                           dtype=np.float32)
                        for currValueIndex_1 in range(
                                0, hdf_Data_Array_Part1.shape[1]):
                            currentValue = hdf_Data_Array_Part1[
                                currentRowIndex][currValueIndex_1]
                            tempRow[currValueIndex_1] = currentValue
                        for currValueIndex_2 in range(
                                0, hdf_Data_Array_Part2.shape[1]):
                            currentValueIndex_2_Adjusted = currValueIndex_2 + hdf_Data_Array_Part1.shape[
                                1]
                            currentValue = hdf_Data_Array_Part2[
                                currentRowIndex][currValueIndex_2]
                            tempRow[
                                currentValueIndex_2_Adjusted] = currentValue
                        stitchedData_Array[currentRowIndex] = tempRow

                    # here goes...
                    array_H5Data = stitchedData_Array
                    logger.debug(
                        "DataCalculator stitchedData_Array has been built.")
                    #logger.debug("DataCalculator Value of 'stitchedData_Array': " + str(stitchedData_Array))

                except:
                    logger.debug(
                        "DataCalculator: Download Job ERROR getting data from H5 to hdf_Data_Array: We are inside 2 try/except blocks, and the second one failed..The code will break shortly...  firstErrorMessage:  "
                        + str(firstErrorMessage) + " System Error Message: " +
                        str(sys.exc_info()))

            #logger.debug("DataCalculator Alert E")

            logger.debug("DataCalculator.getDayValue : Value of 'index': " +
                         str(index))

            # ks note // understanding whats in the 'array' object
            #logger.debug("DataCalculator.getDayValue : Value of 'index': " + str(index))
            #logger.debug("DataCalculator.getDayValue : Value of 'array': " + str(array))
            #logger.debug("DataCalculator.getDayValue : Value of 'array': " + str(array))

            #mask = np.where((array_H5Data != fillValue) & (clippedmask == True))
            #
            #logger.debug("DataCalculator Alert F")
            #
            #logger.debug("DataCalculator Alert F.debug: DataCalculator.getDayValue : Value of 'clippedmask': " + str(clippedmask))
            #logger.debug("DataCalculator Alert F.debug: DataCalculator.getDayValue : Value of 'mask': " + str(mask))
            #logger.debug("DataCalculator Alert F.debug: DataCalculator.getDayValue : Value of 'array_H5Data': " + str(array_H5Data))
            #logger.debug("DataCalculator Alert F.debug: DataCalculator.getDayValue : Value of 'str(len(mask[0]))': " + str(len(mask[0])))
            #logger.debug("DataCalculator Alert F.debug: DataCalculator.getDayValue : Value of 'str(len(mask[1]))': " + str(len(mask[1])))
            #logger.debug("DataCalculator Alert F.debug: DataCalculator.getDayValue : Value of 'str(array_H5Data.size)': " + str(array_H5Data.size))

            # Something in here breaks on Climate Datatypes that are found in the southern hemisphere
            #mathoper.addData(array_H5Data[mask])       # SOMETHING WRONG HERE!!
            mask = None
            try:
                mask = np.where((array_H5Data != fillValue)
                                & (clippedmask == True))

                logger.debug("DataCalculator Alert F")

                #logger.debug("DataCalculator Alert F.debug: DataCalculator.getDayValue : Value of 'clippedmask': " + str(clippedmask))
                #logger.debug("DataCalculator Alert F.debug: DataCalculator.getDayValue : Value of 'mask': " + str(mask))
                #logger.debug("DataCalculator Alert F.debug: DataCalculator.getDayValue : Value of 'array_H5Data': " + str(array_H5Data))
                #logger.debug("DataCalculator Alert F.debug: DataCalculator.getDayValue : Value of 'str(len(mask[0]))': " + str(len(mask[0])))
                #logger.debug("DataCalculator Alert F.debug: DataCalculator.getDayValue : Value of 'str(len(mask[1]))': " + str(len(mask[1])))
                #logger.debug("DataCalculator Alert F.debug: DataCalculator.getDayValue : Value of 'str(array_H5Data.size)': " + str(array_H5Data.size))

                # If the Size of the mask is 0.... raise exception
                if len(mask[0]) == 0:
                    logger.debug(
                        "DataCalculator Alert F.debug.raise: DataCalculator.getDayValue : Issue With len(mask[0]).  It should NOT be equal to 0.  Raising the exception...': "
                    )
                    raise

                mathoper.addData(array_H5Data[mask])  # SOMETHING WRONG HERE!!
            except:
                logger.debug(
                    "DataCalculator Alert F.except.debug: Something went wrong with the normal process.."
                )
                # Make a mask that matches the existing data array but whose values are the result of a clipped mask that is always
                sizeOfH5Data = array_H5Data.size  # ex: 24
                numOf_H5_Rows = array_H5Data.shape[0]  # ex: 3
                numOf_H5_Cols = array_H5Data.shape[1]  # ex: 8
                maskArray_1 = np.zeros(shape=(sizeOfH5Data), dtype=int)
                maskArray_2 = np.zeros(shape=(sizeOfH5Data), dtype=int)
                # Set the values of the arrays (looks like using range does not include the last value.)
                for j in range(0, numOf_H5_Rows):
                    for i in range(0, numOf_H5_Cols):
                        current_Index = i + (
                            numOf_H5_Cols * j
                        )  # currentColumnIndex + (numOfColumns * currentRowIndex)
                        current_Value_Part_1 = j  # Just put the Row Value (this gives the repeating pattern we want
                        current_Value_Part_2 = i  # Current Column should do it.. that pattern repeats for each row.
                        maskArray_1[current_Index] = current_Value_Part_1
                        maskArray_2[current_Index] = current_Value_Part_2
                fakeMask = (maskArray_1, maskArray_2)

                #logger.debug("DataCalculator Alert F.except.debug: DataCalculator.getDayValue : Value of 'fakeMask': " + str(fakeMask))

                # Lets try this again!!
                mathoper.addData(array_H5Data[fakeMask])

            #logger.debug("DataCalculator Alert G")

            del mask
            del array_H5Data
            store.close()
            #logger.debug("DataCalculator Alert H")
            value = mathoper.getOutput()
            #logger.debug("DataCalculator Alert I")
            mathoper.cleanup()
            logger.debug("DataCalculator Alert J")
            return value
        except:
            e = sys.exc_info()[0]
            logger.debug(
                "DataCalculator.getDayValue : returning fill value.. 'mathoper.getFillValue()': "
                + str(mathoper.getFillValue()) + " System Error Message: " +
                str(e))
            return mathoper.getFillValue()

Ejemplo n.º 11

Mostrar archivo

def ingestSubProcess_Year(current_DataTypeNumber, year):

    itemsCounter = 0
    inputYear = str(year)
    processedFileNames = []
    skippedFileNames = []

    dataStore = dataS.datastorage(current_DataTypeNumber,
                                  year,
                                  forWriting=True)
    indexer = params.dataTypes[current_DataTypeNumber]['indexer']
    inputdir = params.dataTypes[current_DataTypeNumber]['inputDataLocation']
    print("inputdir: " + inputdir)
    # Iterate through each file and do the processing
    for filename in os.listdir(inputdir):
        if filename.endswith(".tif"):
            fileToProcess = os.path.join(inputdir, filename)

            #print("Processing "+ str(fileToProcess))
            directory, fileonly = os.path.split(fileToProcess)

            # Get the Year, Month and Day the file represents
            dictionary = get_YearMonthDay_Obj_From_ClimateChange_FileName(
                fileonly)  # dateutils.breakApartChripsName(fileonly)

            # We only want items for the current year
            compareYear = str(dictionary['year'])
            #print("compareYear: " + compareYear)
            if compareYear == inputYear:
                year = dictionary['year']
                month = dictionary['month']
                day = dictionary['day']

                # Open / Read the file
                #print("opening ds")
                ds = georead.openGeoTiff(fileToProcess)
                #print("GetProjection")
                prj = ds.GetProjection()
                #print("GetGeoTransform")
                grid = ds.GetGeoTransform()
                #print("readBandFromFile")
                # Index it.
                img = georead.readBandFromFile(ds, 1)
                ds = None
                #print("getIndexBasedOnDate")
                index = indexer.getIndexBasedOnDate(day, month, year)
                #print "Index:",index
                dataStore.putData(index, img)
                #print("putData")
                processedFileNames.append(fileonly)
                #print("processedFileNames")
                itemsCounter += 1
            else:
                skippedFileNames.append(fileonly)

    # Close and save the data
    dataStore.close()
    print("data should be in ds now")
    if (itemsCounter > 0):
        print("trying to writeSpatialInformation")
        try:
            dataS.writeSpatialInformation(
                params.dataTypes[current_DataTypeNumber]['directory'], prj,
                grid, year)
        except Exception, e:
            print("Here's the error: " + str(e))
        #print("Debug: processedFileNames: " + str(processedFileNames))
        #print("Debug: skippedFileNames: " + str(skippedFileNames))
        print("Finished processing, " + str(itemsCounter) +
              ", data items for year: " + str(year))

        # need the projection and grid strings for the capabilities output.
        retObject = {"projection": prj, "grid": grid}

        return retObject

Ejemplo n.º 12

Mostrar archivo

def processYearByDirectory(dataType, year, inputdir):
    '''
    
    :param dataType:
    :param year:
    :param inputdir:
    '''
    ###Process the incoming data

    dataStore = dataS.datastorage(dataType, year, forWriting=True)
    indexer = params.dataTypes[dataType]['indexer']
    for filename in os.listdir(inputdir):
        if filename.endswith(".tif") and "chirps" in filename:

            fileToProcess = inputdir + "/" + filename
            print "Processing " + fileToProcess
            directory, fileonly = os.path.split(fileToProcess)

            dictionary = dateutils.breakApartChripsName(fileonly)
            year = dictionary['year']
            month = dictionary['month']
            day = dictionary['day']
            sdate = "{0} {1} {2}".format(day, month, year)
            filedate = datetime.datetime.strptime(sdate, "%d %m %Y")
            ds = georead.openGeoTiff(fileToProcess)
            prj = ds.GetProjection()
            grid = ds.GetGeoTransform()
            time.sleep(1)
            img = georead.readBandFromFile(ds, 1)

            index = indexer.getIndexBasedOnDate(day, month, year)
            print "Index:", index
            try:
                changed = False
                with open('/data/data/cserv/www/html/json/stats.json',
                          'r+') as f:
                    data = json.load(f)
                    for item in data['items']:
                        if (item['name'] == 'chirps'):
                            ldatestring = item['Latest']
                            ldate = datetime.datetime.strptime(
                                ldatestring, "%d %m %Y")
                            if ldate < filedate:
                                item['Latest'] = sdate
                                changed = True
                    if changed:
                        f.seek(
                            0
                        )  # <--- should reset file position to the beginning.
                        json.dump(data, f, indent=4)
                        f.truncate()  # remove remaining part
            except Exception as e:
                print("******************" + e +
                      "****************************")
                pass
            time.sleep(1)
            dataStore.putData(index, img)
            time.sleep(1)
            ds = None
    dataStore.close()
    dataS.writeSpatialInformation(params.dataTypes[dataType]['directory'], prj,
                                  grid, year)

Ejemplo n.º 13

Mostrar archivo

Archivo: HDFIngestChirpsMonthlyData.py Proyecto: SERVIR/ClimateSERV-2.0-Server

def ingest_CHIRPSMonthly(startYYYYMM, endYYYYMM):
    # Set the Datatype number
    current_DataTypeNumber = 28  # Hardcoded until there is a better way to get this information (maybe params DB?)

    # Data Classes?

    # Convert to dates
    dateFormat = "%Y%m"
    start_Date = datetime.datetime.strptime(startYYYYMM, dateFormat)
    end_Date = datetime.datetime.strptime(endYYYYMM, dateFormat)

    # Build expected string list
    dataset_Obj_List = []

    end_Date = add_months(
        end_Date, 1)  # this is to fix that hacky while loop found below
    tempDate = start_Date
    while ((end_Date - tempDate).days > 0):
        # Date to be used inside the while loop
        currentDate = tempDate

        # From the FTP downloader for Chirps Monthly
        #theCurrentPath = ftp_FolderPath + "chirps-v2.0." + str(currentDate.year) + "." + str("%02d" % currentDate.month) + ".tif.gz"
        #expected_FTP_FilePaths.append(theCurrentPath)
        #print("-Expected Path: " + str(theCurrentPath))

        # Get the expected filename  # something like this should be part of a dataclasses object
        tifFileName = "chirps-v2.0." + str(currentDate.year) + "." + str(
            "%02d" % currentDate.month) + ".tif"

        # append the object
        obj_To_Append = {
            "Tif_File_Name": tifFileName,
            "year": currentDate.year,
            "month": currentDate.month,
            "day": currentDate.day
        }
        dataset_Obj_List.append(obj_To_Append)

        # Increment and set new temp value for while loop
        currentDate = add_months(tempDate, 1)
        tempDate = currentDate

    # Folder where TIF files end up after download.
    input_Dataset_Folder = params.dataTypes[current_DataTypeNumber][
        'inputDataLocation']

    # Other vars needed for the loop
    itemsCounter = 0
    ingest_Error_List = []
    capabilities_DateFormatString = "%Y_%m"
    last_YYYY_MM_DD_Processed = None

    # Ingest specific stuff
    yearForHDF = int(startYYYYMM[0:4])  # Year for HDF File
    dataStore = dataS.datastorage(current_DataTypeNumber,
                                  yearForHDF,
                                  forWriting=True)
    indexer = params.dataTypes[current_DataTypeNumber]['indexer']

    # Do the actual ingest.
    for currentObj in dataset_Obj_List:

        try:
            # Try to ingest the file, record error if there is an error

            # open the file
            fileName = currentObj['Tif_File_Name']
            fileToProcess = os.path.join(input_Dataset_Folder, fileName)

            print("-Processing File: " + str(fileToProcess))

            theYear = yearForHDF  #currentObj['year']
            theMonth = currentObj['month']
            theDay = 1  #currentObj['day'] # Monthly datasets use the first day of each month.

            # Open / Read the file
            ds = georead.openGeoTiff(fileToProcess)
            #ds = georead.openGeoTiff_WithUpdateFlag(fileToProcess)
            time.sleep(t)
            # If the dataset format does not come with a correct projection and transform, this is where to override them.
            # Set a new projection (since the IMERG data does not come with one already..)
            #ds.SetProjection(IMERG_DataClass.get_DefaultProjection_String())
            #ds.SetGeoTransform(IMERG_DataClass.get_DefaultGeoTransform_Obj())

            # Get the values to save (just like in all the other ingest procedures.
            prj = ds.GetProjection()
            grid = ds.GetGeoTransform()

            # Index it.
            img = georead.readBandFromFile(ds, 1)
            ds = None
            #index = indexer.getIndexBasedOnDate()
            index = indexer.getIndexBasedOnDate(theDay, theMonth, theYear)
            #print "Index:",index
            dataStore.putData(index, img)
            #last_YYYY_MM_DD_Processed = str(theYear)+ "_" + str("%02d" % theMonth) + "_" + str("%02d" % theDay)
            last_YYYY_MM_Processed = str(theYear) + "_" + str(
                "%02d" % theMonth)  # + "_" + str("%02d" % theDay)
            itemsCounter += 1

        except:
            # do something in the event of an error
            e = sys.exc_info()[0]
            errorStr = "-ERROR Ingesting File: " + str(
                fileName) + " System Error Message: " + str(e)
            print(str(errorStr))
            ingest_Error_List.append(errorStr)

    # Close and save the data
    dataStore.close()

    if (itemsCounter > 0):
        dataS.writeSpatialInformation(
            params.dataTypes[current_DataTypeNumber]['directory'], prj, grid,
            yearForHDF)

        #print("Debug: processedFileNames: " + str(processedFileNames))
        #print("Debug: skippedFileNames: " + str(skippedFileNames))
        print("Finished processing, " + str(itemsCounter) +
              ", data items for year: " + str(yearForHDF))

        # need the projection and grid strings for the capabilities output.
        #retObject = {"projection":prj,"grid":grid}

        #return retObject

        # Update the capabilities
        try:
            print(
                "-TODO, Check existing capabilities and overwrite only some parts rather than just overwriting with the last option... this was a shortcut taken to meet an expectation, budget about a day or so to fix this... right now, the last item ingested has it's date set as the 'END Date' for the capabilities range, (so if we are doing a simple reingest for a small subset in the middle of the data somewhere, this bug will show up..)"
            )

            capabilities_Info = {
                "name":
                params.dataTypes[current_DataTypeNumber]['name'],
                "description":
                params.dataTypes[current_DataTypeNumber]['description'],
                "size":
                params.dataTypes[current_DataTypeNumber]['size'],
                "fillValue":
                params.dataTypes[current_DataTypeNumber]['fillValue'],
                "data_category":
                params.dataTypes[current_DataTypeNumber]['data_category'],
                "projection":
                prj,
                "grid":
                grid,

                # Get the start and end Date range.
                "startDateTime":
                "1985_01",
                "endDateTime":
                last_YYYY_MM_Processed,
                "date_FormatString_For_ForecastRange":
                capabilities_DateFormatString

                # Other items to save?
            }

            # Write the capabilities info to the bddb
            theJSONString = json.dumps(capabilities_Info)
            # Create a connection to the DB, set the new values, close the connection
            conn = bdp.BDDbConnector_Capabilities()
            conn.set_DataType_Capabilities_JSON(current_DataTypeNumber,
                                                theJSONString)
            conn.close()

            print("-API Datatype Capabilities for datatype number: " +
                  str(current_DataTypeNumber) + " written to local DB as: " +
                  str(theJSONString))

        except:
            print(
                "-WARNING: Data was ingested on this run AND there was an issue updating the API Capabilities local DB"
            )

    else:
        print("No Items found for year: " + str(yearForHDF))
        print(
            str(len(ingest_Error_List)) +
            " errors associated with ingest items.")

    print("")
    print("Output of per-item Error Log: " + str(ingest_Error_List))
    print("")

Ejemplo n.º 14

Mostrar archivo

def processYearByDirectory(dataType,year, inputdir, nlastdate):
    '''
    
    :param dataType:
    :param year:
    :param inputdir:
    '''
    ###Process the incoming data
    print inputdir
    dataupdated = False
    dataStore = dataS.datastorage(dataType, year, forWriting=True)
    indexer = params.dataTypes[dataType]['indexer']
    for filename in os.listdir(inputdir):
        filesplit = filename.split('.') 
        fyear = filesplit[1]
        fmonth = filesplit[2][:2]
        fday = filesplit[2][2:]
        fdatestring = fday + " " + fmonth + " " + fyear
        fdate = datetime.datetime.strptime(fdatestring, "%d %m %Y")
        if fdate > nlastdate:
			if filename.endswith(".tif") and os.stat(inputdir+"/"+filename).st_size > 0:
				dataupdated = True
				fileToProcess = inputdir+"/"+filename
				print "Processing "+fileToProcess
				directory, fileonly = os.path.split(fileToProcess)
			
				dictionary = dateutils.breakApartGEFSNewName(fileonly) #name convention changed, update needed
				year = dictionary['year']
				month = dictionary['month']
				day = dictionary['day']
				sdate = "{0} {1} {2}".format(day, month, year)
				filedate = datetime.datetime.strptime(sdate, "%d %m %Y")
				ds = georead.openGeoTiff(fileToProcess)
				prj=ds.GetProjection()
				grid = ds.GetGeoTransform()
				# day = decad * 10
				# if month == int(2)  and day == int(30):
				#     day = 28
				img =  georead.readBandFromFile(ds, 1)
				ds = None
				index = indexer.getIndexBasedOnDate(day,month,year)
				print "Index:",index
				try:
					changed = False
					with open('/data/data/cserv/www/html/json/stats.json', 'r+') as f:
						data = json.load(f)
						for item in data['items']:
							if(item['name'] == 'gefsprecip'):
								ldatestring = item['Latest']
								ldate = datetime.datetime.strptime(ldatestring, "%d %m %Y")
								if ldate < filedate:
									print("file date is later")
									item['Latest'] = sdate
									changed = True
						if changed:
							f.seek(0)        # <--- should reset file  position to the beginning.
							json.dump(data, f, indent=4)
							f.truncate()     # remove remaining part
				except Exception as e:
					print(e)
					pass
				dataStore.putData(index, img)

    dataStore.close()
    if dataupdated:
		dataS.writeSpatialInformation(params.dataTypes[dataType]['directory'],prj,grid,year)

Ejemplo n.º 15

Mostrar archivo

Archivo: ChirpOnChirps.py Proyecto: SERVIR/ClimateSERV-2.0-Server

def processDataStarting(yyyy, mm, dd):
    dataType = 0
    indexer = params.dataTypes[dataType]['indexer']
    inputdir = params.dataTypes[0][
        'inputDataLocation'] + yyyy  # will need to update this if year changes
    dataStore = dataS.datastorage(
        dataType, int(yyyy),
        forWriting=True)  # will need to update this if year changes
    ldatestring = dd + " " + mm + " " + yyyy
    ldate = datetime.datetime.strptime(ldatestring, "%d %m %Y")
    date = ldate + datetime.timedelta(days=1)

    while date.date() < datetime.datetime.now().date():
        fileToProcess = inputdir + "/chirp." + date.strftime(
            '%Y.%m.%d') + ".tif"

        if os.path.exists(
                fileToProcess) and os.path.getsize(fileToProcess) > 0:
            print "file exists, ingest started on: " + fileToProcess
            directory, fileonly = os.path.split(fileToProcess)
            dictionary = dateutils.breakApartChripsName(fileonly)
            year = dictionary['year']
            month = dictionary['month']
            day = dictionary['day']
            sdate = "{0} {1} {2}".format(day, month, year)
            filedate = datetime.datetime.strptime(sdate, "%d %m %Y")
            ds = georead.openGeoTiff(fileToProcess)
            prj = ds.GetProjection()
            grid = ds.GetGeoTransform()

            img = georead.readBandFromFile(ds, 1)
            ds = None
            index = indexer.getIndexBasedOnDate(date.day, date.month,
                                                date.year)
            print "Index:", index
            c = np.array(dataStore.getData(index))
            if (c == -9999).all() == True:
                dataStore.putData(index, img)
                print date.strftime('%Y.%m.%d') + " data added to hdf"
                try:
                    changed = False
                    with open('/data/data/cserv/www/html/json/stats.json',
                              'r+') as f:
                        data = json.load(f)
                        for item in data['items']:
                            if (item['name'] == 'chirp'):
                                ldatestring = item['Latest']
                                ldate = date  #.strftime("%d %m %Y") #datetime.datetime.strptime(ldatestring, "%d %m %Y")
                                if ldate < filedate:
                                    item['Latest'] = sdate
                                    changed = True
                        if changed:
                            f.seek(
                                0
                            )  # <--- should reset file  position to the beginning.
                            json.dump(data, f, indent=4)
                            f.truncate()  # remove remaining part
                except Exception as e:
                    print(e)
                    pass

            else:
                print date.strftime('%Y.%m.%d') + " data already in hdf"
        else:
            print "nothing to ingest "
        date = date + datetime.timedelta(days=1)

    dataStore.close()