Ejemplo n.º 1
0
def ingest_IMERG(startYYYYMMDD, endYYYYMMDD):

    # Set the Datatype number
    current_DataTypeNumber = 34  # Hardcoded until there are more IMERG types in here..
    
    # Instance of Imerg Data Classes
    IMERG_DataClass =  IDC.IMERG_Data()
    
    # Convert to dates
    dateFormat = "%Y%m%d"
    start_Date = datetime.datetime.strptime(startYYYYMMDD, dateFormat)
    end_Date = datetime.datetime.strptime(endYYYYMMDD, dateFormat)
    
    # Build expected string list
    dataset_Obj_List = []
    #expected_Tif_FileNames = [] # 
    
    # iterate through all dates
    delta = end_Date - start_Date
    for i in range(delta.days + 1):
        #print start_Date + datetime.timedelta(days=i)
        currentDate = start_Date + datetime.timedelta(days=i)
        tifFileName = IMERG_DataClass.get_Expected_Tif_FileName(currentDate.year, currentDate.month, currentDate.day)
        #expected_Tif_FileNames.append(tifFileName)
        obj_To_Append = {
               "Tif_File_Name":tifFileName,
               "year":currentDate.year,
               "month":currentDate.month,
               "day":currentDate.day
               }
        dataset_Obj_List.append(obj_To_Append)
    
    # Get the expected file names.
    
    # Folder where TIF and TFW files end up.
    input_Dataset_Folder = params.dataTypes[current_DataTypeNumber]['inputDataLocation']

    # Other vars needed for the loop
    itemsCounter = 0
    ingest_Error_List = []
    capabilities_DateFormatString = "%Y_%m_%d"
    last_YYYY_MM_DD_Processed = None
    
    # Ingest specific stuff
    yearForHDF = int(startYYYYMMDD[0:4])  # Year for HDF File
    dataStore = dataS.datastorage(current_DataTypeNumber, yearForHDF, forWriting=True)
    indexer = params.dataTypes[current_DataTypeNumber]['indexer']
    
    # Do the actual ingest.
    #for fileName in expected_Tif_FileNames:
    for currentObj in dataset_Obj_List:
        
        try:
            # Try to ingest the file, record error if there is an error
            
            # open the file
            fileName = currentObj['Tif_File_Name']
			
            fileToProcess = os.path.join(input_Dataset_Folder,fileName)
            print(fileToProcess)
            if os.path.isfile(fileToProcess):
				print("")
            else:
				fileToProcess=fileToProcess.replace("03E","04A")
				if os.path.isfile(fileToProcess):
					print("")
				else:
					fileToProcess=fileToProcess.replace("04A","04B")            
            print("-Processing File: " + str(fileToProcess))
            
            # For some reason, we need to open TFW files instead of TIFs with GDAL..
            fileToProcess_TFW = IMERG_DataClass.convert_TIF_FileName_To_TFW_Filename(fileToProcess)

            theYear = yearForHDF #currentObj['year']
            theMonth = currentObj['month']
            theDay = currentObj['day']
            print("before geotiff")
            # Open / Read the file
            #ds = georead.openGeoTiff(fileToProcess_TFW)
            ds = georead.openGeoTiff_WithUpdateFlag(fileToProcess)
            print("after geotiff")

            # Set a new projection (since the IMERG data does not come with one already..)
            ds.SetProjection(IMERG_DataClass.get_DefaultProjection_String())
            ds.SetGeoTransform(IMERG_DataClass.get_DefaultGeoTransform_Obj())
            
            # Get the values to save (just like in all the other ingest procedures.
            prj = ds.GetProjection()
            grid = ds.GetGeoTransform()
                
            # Index it.
            img =  georead.readBandFromFile(ds, 1)
            print img
            ds = None
            index = indexer.getIndexBasedOnDate(theDay, theMonth, theYear)
            #print "Index:",index
            dataStore.putData(index, img)
            last_YYYY_MM_DD_Processed = str(theYear)+ "_" + str("%02d" % theMonth) + "_" + str("%02d" % theDay)
            itemsCounter += 1
            
            
            
        except:
            # do something in the event of an error
            e = sys.exc_info()[0]
            errorStr = "-ERROR Ingesting File: " + str(fileName) + " System Error Message: " + str(e)
            print(str(errorStr))
            ingest_Error_List.append(errorStr)
        
    # Close and save the data
    dataStore.close()
    
    if(itemsCounter > 0):
        dataS.writeSpatialInformation(params.dataTypes[current_DataTypeNumber]['directory'],prj,grid,yearForHDF)

        #print("Debug: processedFileNames: " + str(processedFileNames))
        #print("Debug: skippedFileNames: " + str(skippedFileNames))
        print("Finished processing, " + str(itemsCounter) + ", data items for year: " + str(yearForHDF))
    
        # need the projection and grid strings for the capabilities output.
        #retObject = {"projection":prj,"grid":grid}
    
        #return retObject
    
        # Update the capabilities
        try:
            print("-TODO, Check existing capabilities and overwrite only some parts rather than just overwriting with the last option... this was a shortcut taken to meet an expectation, budget about a day or so to fix this... right now, the last item ingested has it's date set as the 'END Date' for the capabilities range, (so if we are doing a simple reingest for a small subset in the middle of the data somewhere, this bug will show up..)")
            
            
            capabilities_Info = {
                         "name":params.dataTypes[current_DataTypeNumber]['name'],
                         "description":params.dataTypes[current_DataTypeNumber]['description'],
                         "size":params.dataTypes[current_DataTypeNumber]['size'],
                         "fillValue":params.dataTypes[current_DataTypeNumber]['fillValue'],
                         "data_category":params.dataTypes[current_DataTypeNumber]['data_category'],
                         "projection":prj,
                         "grid":grid,
                         
                        # Get the start and end Date range.
                         "startDateTime":"2015_03_08",
                         "endDateTime":last_YYYY_MM_DD_Processed,
                         "date_FormatString_For_ForecastRange":capabilities_DateFormatString
                         
                        # Other items to save?
                         
                         }
    
            # Write the capabilities info to the bddb
            theJSONString = json.dumps(capabilities_Info)
            # Create a connection to the DB, set the new values, close the connection
            conn = bdp.BDDbConnector_Capabilities()
            conn.set_DataType_Capabilities_JSON(current_DataTypeNumber, theJSONString)
            conn.close()
            
            print("-API Datatype Capabilities for datatype number: " +str(current_DataTypeNumber) + " written to local DB as: " + str(theJSONString))
            
        except:
            print("-WARNING: Data was ingested on this run AND there was an issue updating the API Capabilities local DB")
    
    else:
        print("No Items found for year: " + str(yearForHDF))
        print(str(len(ingest_Error_List)) + " errors associated with ingest items.")
        
    print("")
    print("Output of per-item Error Log: " + str(ingest_Error_List))
    print("")
def download_and_Extract_IMERG_2015_Dataset():
    # Parse Start and End date ranges from strings
    #startYear = startYYYYMMDD[0:4]
    #startMonth = startYYYYMMDD[4:6]
    #startDay = startYYYYMMDD[6:8]

    #endYear = endYYYYMMDD[0:4]
    #endMonth = endYYYYMMDD[4:6]
    #endDay = endYYYYMMDD[6:8]

    # Set the Datatype number
    current_DataTypeNumber = 26  # Hardcoded until there are more IMERG types in here..

    # Instance of Imerg Data Classes
    IMERG_DataClass = IDC.IMERG_Data()

    # Hard coding because this is a one off dataset
    startYYYYMMDD = "20150307"
    endYYYYMMDD = "20151231"

    # Convert to dates
    dateFormat = "%Y%m%d"
    start_Date = datetime.datetime.strptime(startYYYYMMDD, dateFormat)
    end_Date = datetime.datetime.strptime(endYYYYMMDD, dateFormat)

    # Build expected string list
    #expected_FTP_FilePaths_TIF = []
    #expected_FTP_FilePaths_TFW = []
    expected_FTP_FilePaths = []  # "ftpPathTo_tif"  and "ftpPathTo_tfw

    # iterate through all dates
    delta = end_Date - start_Date
    for i in range(delta.days + 1):
        #print start_Date + datetime.timedelta(days=i)
        currentDate = start_Date + datetime.timedelta(days=i)
        #tifPath = IMERG_DataClass.get_Expected_FTP_FilePath_To_Tif(currentDate.year, currentDate.month, currentDate.day)
        #tfwPath = IMERG_DataClass.get_Expected_FTP_FilePath_To_Tfw(currentDate.year, currentDate.month, currentDate.day)
        tifPath = IMERG_DataClass.get_Expected_FTP_FilePath_To_Tif_2015Dataset(
            currentDate.year, currentDate.month, currentDate.day)
        tfwPath = IMERG_DataClass.get_Expected_FTP_FilePath_To_Tfw_2015Dataset(
            currentDate.year, currentDate.month, currentDate.day)

        objToAdd = {"ftpPathTo_tif": tifPath, "ftpPathTo_tfw": tfwPath}
        expected_FTP_FilePaths.append(objToAdd)
        #expected_FTP_FilePaths_TIF.append(tifPath)
        #expected_FTP_FilePaths_TFW.append(tfwPath)

    # Folder Stuff
    # Create the destination folder if it does not exist
    dataDestinationFolder = params.dataTypes[current_DataTypeNumber][
        'inputDataLocation']
    print("-Data Destination Folder (Downloading To) : " +
          str(dataDestinationFolder))
    testFolderPath = os.path.dirname(dataDestinationFolder)
    if not os.path.exists(testFolderPath):
        os.makedirs(testFolderPath)
        print("-Created a new folder at path: " + str(testFolderPath))

    # Connect to the FTP Server and download all of the files in the list.
    ftp_Connection = None
    try:
        ftp_Connection = ftplib.FTP(IMERG_DataClass.FTP_Host,
                                    IMERG_DataClass.FTP_UserName,
                                    IMERG_DataClass.FTP_UserPass)
        time.sleep(1)
    except:
        e = sys.exc_info()[0]
        print(
            "-ERROR Connecting to FTP.. bailing out..., System Error Message: "
            + str(e))
        return

    print(
        "-Downloading, extracting and removing temp files... this may take a few minutes...."
    )
    downloadCounter = 0
    # Iterate through all of our expected file paths
    for ftpFullFilePaths in expected_FTP_FilePaths:

        isError = False
        errorLog = []

        # print progress
        if (downloadCounter % 10 == 0):
            print("-Downloaded (extracted and removed temp zipfiles): " +
                  str(downloadCounter) + " rasters so far..")
        # Get the file names
        filenameOnly_Tif = ftpFullFilePaths['ftpPathTo_tif'].split('/')[-1]
        filenameOnly_Tfw = ftpFullFilePaths['ftpPathTo_tfw'].split('/')[-1]

        # Make local filenames
        local_FullFilePath_ToSave_Tif = os.path.join(dataDestinationFolder,
                                                     filenameOnly_Tif)
        local_FullFilePath_ToSave_Tfw = os.path.join(dataDestinationFolder,
                                                     filenameOnly_Tfw)

        # Unzipped filenames
        local_FullFilePath_ToExtract_Tif = local_FullFilePath_ToSave_Tif[:
                                                                         -3]  # Removes the '.gz' part of the filename
        local_FullFilePath_ToExtract_Tfw = local_FullFilePath_ToSave_Tfw[:
                                                                         -3]  # Removes the '.gz' part of the filename

        # Get directoryPath and Filename for FTP Server
        ftp_PathTo_TIF = IMERG_DataClass._get_FTP_FolderPath_From_FullFilePath(
            ftpFullFilePaths['ftpPathTo_tif'])
        ftp_PathTo_TFW = IMERG_DataClass._get_FTP_FolderPath_From_FullFilePath(
            ftpFullFilePaths['ftpPathTo_tif'])

        # Download the Tif
        try:
            with open(local_FullFilePath_ToSave_Tif, "wb") as f:
                ftp_Connection.retrbinary(
                    "RETR " + ftp_PathTo_TIF,
                    f.write)  # "RETR %s" % ftp_PathTo_TIF

        except:
            errorStr = "-ERROR Downloading TIF file: " + ftp_PathTo_TIF
            print(errorStr)
            errorLog.append(errorStr)
            isError = True

        # Give the FTP Connection a short break (Server spam protection mitigation)
        time.sleep(1)

        # Download the Tfw
        try:
            with open(local_FullFilePath_ToSave_Tfw, "wb") as f:
                ftp_Connection.retrbinary("RETR " + ftp_PathTo_TFW, f.write)
        except:
            errorStr = "-ERROR Downloading TFW file: " + ftp_PathTo_TFW
            print(errorStr)
            errorLog.append(errorStr)
            isError = True

        # Give the FTP Connection a short break (Server spam protection mitigation)
        time.sleep(1)

        # Extract the Tif files
        try:
            inF = gzip.open(local_FullFilePath_ToSave_Tif, 'rb')
            outF = open(local_FullFilePath_ToExtract_Tif, 'wb')
            outF.write(inF.read())
            inF.close()
            outF.close()
        except:
            errorStr = "-ERROR Extracting the TIF file: " + local_FullFilePath_ToSave_Tif
            print(errorStr)
            errorLog.append(errorStr)
            isError = True

        # Extract the Tfw file
        try:
            inF = gzip.open(local_FullFilePath_ToSave_Tfw, 'rb')
            outF = open(local_FullFilePath_ToExtract_Tfw, 'wb')
            outF.write(inF.read())
            inF.close()
            outF.close()
        except:
            errorStr = "-ERROR Extracting the TFW file: " + local_FullFilePath_ToSave_Tfw
            print(errorStr)
            errorLog.append(errorStr)
            isError = True

        # Remove the temporary TIF.gz file (the .gz files)
        try:
            os.remove(local_FullFilePath_ToSave_Tif)
        except:
            errorStr = "-ERROR Removing the tif.gz file: " + local_FullFilePath_ToSave_Tif
            print(errorStr)
            errorLog.append(errorStr)
            isError = True
        # Remove the temporary TFW.gz file (the .gz files)
        try:
            os.remove(local_FullFilePath_ToSave_Tfw)
        except:
            errorStr = "-ERROR Removing the tfw.gz file: " + local_FullFilePath_ToSave_Tfw
            print(errorStr)
            errorLog.append(errorStr)
            isError = True

        if isError == True:
            # try and remove the file??
            pass

        downloadCounter += 1

    #print "STOPPED RIGHT HERE!!! SHOULD ALREADY HAVE THE FILES... NOW NEED TO BETTER CATCH THE ERRORS AND REPORT THEM DOWN HERE... THATS ABOUT IT REALLY FOR THE DOWNLOADER....."

    # Pretty much done with the downloader for now..

    pass
Ejemplo n.º 3
0
def download_IMERG(startYYYYMMDD, endYYYYMMDD):
    # Parse Start and End date ranges from strings
    #startYear = startYYYYMMDD[0:4]
    #startMonth = startYYYYMMDD[4:6]
    #startDay = startYYYYMMDD[6:8]

    #endYear = endYYYYMMDD[0:4]
    #endMonth = endYYYYMMDD[4:6]
    #endDay = endYYYYMMDD[6:8]

    # Set the Datatype number
    current_DataTypeNumber = 26  # Hardcoded until there are more IMERG types in here..

    # Instance of Imerg Data Classes
    IMERG_DataClass = IDC.IMERG_Data()

    # Convert to dates
    dateFormat = "%Y%m%d"
    start_Date = datetime.datetime.strptime(startYYYYMMDD, dateFormat)
    end_Date = datetime.datetime.strptime(endYYYYMMDD, dateFormat)

    # Build expected string list
    #expected_FTP_FilePaths_TIF = []
    #expected_FTP_FilePaths_TFW = []
    expected_FTP_FilePaths = []  # "ftpPathTo_tif"  and "ftpPathTo_tfw
    # iterate through all dates
    delta = end_Date - start_Date
    for i in range(delta.days + 1):
        #print start_Date + datetime.timedelta(days=i)
        currentDate = start_Date + datetime.timedelta(days=i)
        print(currentDate)
        tifPath = IMERG_DataClass.get_Expected_FTP_FilePath_To_Tif(
            currentDate.year, currentDate.month, currentDate.day)
        tfwPath = IMERG_DataClass.get_Expected_FTP_FilePath_To_Tfw(
            currentDate.year, currentDate.month, currentDate.day)
        objToAdd = {"ftpPathTo_tif": tifPath, "ftpPathTo_tfw": tfwPath}
        if len(tifPath.strip()) > 0:
            expected_FTP_FilePaths.append(objToAdd)
        #expected_FTP_FilePaths_TIF.append(tifPath)
        #expected_FTP_FilePaths_TFW.append(tfwPath)

    # Folder Stuff
    # Create the destination folder if it does not exist
    dataDestinationFolder = params.dataTypes[current_DataTypeNumber][
        'inputDataLocation']
    print("-Data Destination Folder (Downloading To) : " +
          str(dataDestinationFolder))
    testFolderPath = os.path.dirname(dataDestinationFolder)
    if not os.path.exists(testFolderPath):
        os.makedirs(testFolderPath)
        print("-Created a new folder at path: " + str(testFolderPath))

    # Connect to the FTP Server and download all of the files in the list.
    ftp_Connection = None

    print("-Downloading files... this may take a few minutes....")
    downloadCounter = 0
    # Iterate through all of our expected file paths
    for ftpFullFilePaths in expected_FTP_FilePaths:

        isError = False
        errorLog = []
        if (downloadCounter % 10 == 0):
            print("-Downloaded: " + str(downloadCounter) + " rasters so far..")
        # Get the file names
        filenameOnly_Tif = ftpFullFilePaths['ftpPathTo_tif'].split('/')[-1]
        filenameOnly_Tfw = filenameOnly_Tif[:-2] + "fw"  # Remove part of the extension
        #ftpFullFilePaths['ftpPathTo_tfw'].split('/')[-1]

        # Make local filenames
        local_FullFilePath_ToSave_Tif = os.path.join(dataDestinationFolder,
                                                     filenameOnly_Tif)
        local_FullFilePath_ToSave_Twf = os.path.join(dataDestinationFolder,
                                                     filenameOnly_Tfw)

        # Get directoryPath and Filename for FTP Server
        ftp_PathTo_TIF = ftpFullFilePaths[
            'ftpPathTo_tif']  #IMERG_DataClass._get_FTP_FolderPath_From_FullFilePath(ftpFullFilePaths['ftpPathTo_tif'])
        ftp_PathTo_TWF = ftpFullFilePaths[
            'ftpPathTo_tif'][:-2] + "fw"  # IMERG_DataClass._get_FTP_FolderPath_From_FullFilePath(ftpFullFilePaths['ftpPathTo_tfw'])

        # Download the Tif

        fx = open(local_FullFilePath_ToSave_Tif, "wb")
        fx.close()
        os.chmod(local_FullFilePath_ToSave_Tif, 0777)
        print "creating file: " + local_FullFilePath_ToSave_Tif
        print "download path: " + ftp_PathTo_TIF
        try:
            urllib.urlretrieve(ftp_PathTo_TIF, local_FullFilePath_ToSave_Tif)
        except Exception, e:
            os.remove(local_FullFilePath_ToSave_Tif)
            print "removing the tif file: " + str(e)
        # Download the Tfw
        fx = open(local_FullFilePath_ToSave_Twf, "wb")
        fx.close()
        os.chmod(local_FullFilePath_ToSave_Twf, 0777)
        try:
            urllib.urlretrieve(ftp_PathTo_TWF, local_FullFilePath_ToSave_Twf)
        except:
            os.remove(local_FullFilePath_ToSave_Twf)
            print "removing the twf file"
        if isError == True:
            # try and remove the file??
            pass

        downloadCounter += 1