Exemple #1
0
    def downloadFileFromBucket(self, bucketName,fileNameInBucket,filePathAtDestination,overwriteContent,makeDirectory,VERBOSE=True):

        '''
        Downloads a single file from a bucket to a local directory
    
        params to pass:
        bucketName              : (string) name of bucket in which file of interest is lcoated
        fileNameInBucket        : (string) name of file of interest in bucket
        filePathAtDestination   : (string) full path - including filename itself - of the file once it has been downloaded
        overwriteContent        : (logical) if the specified target file already exists locally, do we overwrite?
        makeDirectory           : (logical) if the specified target file path includes new directories, should we make these?
        '''

        # check file exists in bucket
        if (self.CheckFileExistsInBucket(bucketName,fileNameInBucket,VERBOSE=True) != True) : 
            raise RuntimeError, 'File "'+str(fileNameInBucket)+'" does not exist in bucket: will not download requested file !!!'

        # if we are not overwriting, then check if file alrady exists locally : warn and abort download if it does
        if overwriteContent==False:
            if (os.path.exists(filePathAtDestination)==True):
                warnings.warn ('File "'+str(filePathAtDestination)+'" already exists and overwriteContent==False: will not download requested file !!!' )
            
        # if we are overwriting, check that existing file is not actually a directory
        if overwriteContent==True:
            if(os.path.isdir(filePathAtDestination)==True):
                raise RuntimeError, 'A directory ("'+str(fileNameInBucket)+'") exists at path "'+str(filePathAtDestination)+'" with same name as file trying to download: EXITING!!'

        # if we are making the local directory in which to copy this file, use checkAndBuildPaths to ensure it exists
        if makeDirectory==True:
            # first remove filename from file path to leave just path to directory
            fpathTrunc = filePathAtDestination.rpartition('/')[0]
            checkAndBuildPaths (fpathTrunc,VERBOSE=True,BUILD=True)

        bucket = self.conn.get_bucket(bucketName) 

        # establish key object
        filekey=bucket.get_key(fileNameInBucket)

        # pass the contents of file on s3 to the local file
        filekey.get_contents_to_filename(filePathAtDestination)

        # finally, check that this file made it from S3 to local destination

        ## first check there is even a file of this name at local destination
        if(os.path.exists(filePathAtDestination)!= True):
            raise RuntimeError, 'Final check revealed file "'+str(filePathAtDestination)+'" did not copy succesfully from S3 file "'+str(fileNameInBucket)+'" in bucket "'+str(bucketName)+'"'
            

        ## then check the md5 keys match
        md5_s3 = filekey.etag.strip(str('"'))
        md5string = md5.new(file(filePathAtDestination).read()).hexdigest()

        if(md5string != md5_s3):
            raise RuntimeError, 'Final check revealed file "'+str(filePathAtDestination)+'" did not copy succesfully from S3 file "'+str(fileNameInBucket)+'" in bucket "'+str(bucketName)+'"'
Exemple #2
0
    def isLOCALFILEIdenticalToS3FILE(self, bucketName, fileNameInBucket, localFilePath):

        """
        Checks whther a local file and a file on S3 are identical, according to their md5 strings. Does all the necessary checks
        and returns a True/False accordingly
    
        params to pass:
        bucketName              : (string) name of bucket that file of interest is located in 
        fileNameInBucket        : (string) name of file of interest in the bucket
        localFilePath           : (logical) full path to local file of interest
        """

        # check local file exists
        if checkAndBuildPaths(localFilePath, VERBOSE=True, BUILD=False) == -9999:
            return False

        # get md5 string for local file
        md5string = md5.new(file(localFilePath).read()).hexdigest()

        ## check this bucket exits
        if self.conn.lookup(bucketName) is None:
            print 'WARNING!!! requested bucket "' + str(bucketName) + '" does not exist on S3 !!!'
            return False

        ## check the file exists on this bucket
        if self.CheckFileExistsInBucket(bucketName, fileNameInBucket, VERBOSE=True) != True:
            return False

        ## get md5 string for this file in the bucket
        bucket = self.conn.get_bucket(bucketName)
        filekey = bucket.get_key(fileNameInBucket)
        md5_s3 = filekey.etag.strip(str('"'))

        # compare this to the passed md5 string
        if md5string != md5_s3:
            raise RuntimeError, 'The md5 string of file "' + str(fileNameInBucket) + '" in bucket "' + str(
                bucketName
            ) + '" does not match that of local file at "' + str(localFilePath) + '"  !!!!'

        # if these tests passed, then return True
        return True
print "FileEndRel: " + str(FileEndRel)
print "totalN: " + str(totalN)
print "startRel: " + str(startRel)
print "endRel: " + str(endRel)
print "BURDEN: " + str(BURDEN)
print "PERPIXEL: " + str(PERPIXEL)
print "PERCOUNTRY: " + str(PERCOUNTRY)

if PERPIXEL == True:

    print '\n running PERPIXEL extraction'

    # check path for per-pixel exports exists
    print '\nchecking path for export exists..'
    checkAndBuildPaths(exportPathDistributed_perpixel,
                       VERBOSE=True,
                       BUILD=True)

    # now call extractSummaries_perpixel substituting in the formatted sys args
    print '\nrunning extractSummaries_perpixel..'
    extractSummaries_perpixel(
        [slice(None, None, None),
         slice(None, None, None), MonthsSlice], a_lo, a_hi, n_per,
        FileStartRel, FileEndRel, totalN, startRel, endRel, BURDEN)

    # now upload the output back to the S3 storage
    #S.uploadDirectoryAsBucket('distributedoutput_perpixel',exportPathDistributed_perpixel,uploadConstituentFiles=True,overwriteContent=True)

    ## loop through all files in local export storage
    for fname in os.listdir(exportPathDistributed_perpixel):
# import libraries
from map_utils import amazon_ec
from map_utils import S3
import numpy as np
from map_utils import checkAndBuildPaths
import time

# initialise amazon S3 key object 
S=S3(keyPath='/home/pwg/mbg-world/mbgw-scripts/s3code.txt')

# set job distribution parameters
NINSTANCES = 19
MAXJOBSPERINSTANCE = 3
MAXJOBTRIES = 1 #maximum number of tries before we give up on any individual job
STDOUTPATH = '/home/pwg/mbg-world/stdout_extraction/DistributedOutputSTDOUTERR_'+str(PARAMFILE.partition('.')[0])+'_'+str(time.ctime())+'/'
checkAndBuildPaths(STDOUTPATH,VERBOSE=True,BUILD=True)

# set path to realisations on S3 and extract bucket and generic file name
relBucket = localparams.realisations_path.rsplit('/')[-2]
relPath = localparams.realisations_path.rsplit('/')[-1]

# call queryRealizationsInBucket to obtain number and start/end realisation numbers of these realisation files
relDict = S.queryRealizationsInBucket(relBucket,relPath,VERBOSE=True)

print '\nquerying bucket '+str(relBucket)+' : found '+str(relDict['Nrealisations'])+' realisations accross '+str(relDict['Nfiles'])+' files.'

# set realization number parameters
NRELS = relDict['Nrealisations']
NJOBS = relDict['Nfiles']

####################################TEMP
Exemple #5
0
# deal with system arguments (expects two)
RESERVATIONID = sys.argv[1]  ## defines ID of reservation that contains the instances we will use on EC2
PARAMFILE_PY = sys.argv[2]  ## defines name of python file housing the parmeter definitions (e.g. extract_params_AF.py)
PARAMFILE_R = int(sys.argv[3])  ## defines name of R file housing additoinal parmeter definitions for conditoinal simulation R scripts
#MAXJOBSPERINSTANCE = int(sys.argv[4]) 

# initialise amazon S3 key object 
S=S3(keyPath='/home/pwg/mbg-world/mbgw-scripts/s3code.txt')

# set job distribution parameters
NINSTANCES = 19
MAXJOBSPERINSTANCE = 1
MAXJOBTRIES = 1 #maximum number of tries before we give up on any individual job
STDOUTPATH = '/home/pwg/mbg-world/stdout_CONDSIM/DistributedOutputSTDOUTERR_'+str(PARAMFILE_PY.partition('.')[0])+'_'+str(time.ctime())+'/'
checkAndBuildPaths(STDOUTPATH,VERBOSE=True,BUILD=True)

# set realization number parameters
n_total = 57#100 #600
iter_per_job = 1
NJOBS = n_total / iter_per_job


#############TEMP
INTERIMINDEX=np.array([17,18,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55])
##################




# define files to upload to instance (from local machine) before any execution
from rpy import *
from extract_PYlib import examineSalb

# import some r functions
r.source('/home/pwg/map_utils/map_utils/GeneralUtility.R')
writeTableWithNamesPY = r['writeTableWithNames']

# set some parameters
salb1km_path = "/home/pwg/mbg-world/datafiles/auxiliary_data/GridsForCS/salb1km-e2_y-x+.hdf5"
grump1km_path= "/home/pwg/mbg-world/datafiles/auxiliary_data/GridsForCS/gr071km_y-x+.hdf5"
lims1km_path= "/home/pwg/mbg-world/datafiles/auxiliary_data/GridsForCS/lims1km-e_y-x+.hdf5"
outputTable_path = "/home/pwg/mbg-world/extraction/FixedPopulationExtraction.csv"

# check paths
from map_utils import checkAndBuildPaths
checkAndBuildPaths(salb1km_path,VERBOSE=True,BUILD=False)
checkAndBuildPaths(grump1km_path,VERBOSE=True,BUILD=False)
checkAndBuildPaths(lims1km_path,VERBOSE=True,BUILD=False)


# open link to salb grid, 3-level limits grid, and population grid
salb1km = tb.openFile(salb1km_path, mode = "r")    
grump1km = tb.openFile(grump1km_path, mode = "r")    
lims1km = tb.openFile(lims1km_path, mode = "r") 

# run check that input grids are the same shape
if(((np.shape(salb1km.root.data)==np.shape(grump1km.root.data)==np.shape(lims1km.root.data))==False)):
    print "WARNING!! input grids are of uneven shape. salb1km="+str(np.shape(salb1km.root.data))+"; grump1km="+str(np.shape(grump1km.root.data))+"; lims1km="+str(np.shape(lims1km.root.data))

# run extract salb to get list of unique salb IDs and corresponding pixel count
salbDict = examineSalb (salb1km)
from extract_PYlib import examineSalb

# import some r functions
r.source('/home/pwg/map_utils/map_utils/GeneralUtility.R')
writeTableWithNamesPY = r['writeTableWithNames']

# set some parameters
salb1km_path = "/home/pwg/mbg-world/datafiles/auxiliary_data/GridsForCS/salb1km-e2_y-x+.hdf5"
grump1km_path = "/home/pwg/mbg-world/datafiles/auxiliary_data/GridsForCS/gr071km_y-x+.hdf5"
lims1km_path = "/home/pwg/mbg-world/datafiles/auxiliary_data/GridsForCS/lims1km-e_y-x+.hdf5"
outputTable_path = "/home/pwg/mbg-world/extraction/FixedPopulationExtraction.csv"

# check paths
from map_utils import checkAndBuildPaths

checkAndBuildPaths(salb1km_path, VERBOSE=True, BUILD=False)
checkAndBuildPaths(grump1km_path, VERBOSE=True, BUILD=False)
checkAndBuildPaths(lims1km_path, VERBOSE=True, BUILD=False)

# open link to salb grid, 3-level limits grid, and population grid
salb1km = tb.openFile(salb1km_path, mode="r")
grump1km = tb.openFile(grump1km_path, mode="r")
lims1km = tb.openFile(lims1km_path, mode="r")

# run check that input grids are the same shape
if (((np.shape(salb1km.root.data) == np.shape(grump1km.root.data) == np.shape(
        lims1km.root.data)) == False)):
    print "WARNING!! input grids are of uneven shape. salb1km=" + str(
        np.shape(salb1km.root.data)) + "; grump1km=" + str(
            np.shape(grump1km.root.data)) + "; lims1km=" + str(
                np.shape(lims1km.root.data))
#################################################################################
EXTRACT PER-COUNTRY MEAN PR,BURDEN,PAR

from extract_PYlib import *

# check filepaths stated in parameter file
from map_utils import checkAndBuildPaths
#checkAndBuildPaths(filename,VERBOSE=True,BUILD=True)
checkAndBuildPaths(exportPathDistributed_country,VERBOSE=True,BUILD=True)
checkAndBuildPaths(exportPathCombined_country,VERBOSE=True,BUILD=True)
checkAndBuildPaths(salblim1km_path,VERBOSE=True,BUILD=True)
checkAndBuildPaths(gr001km_path,VERBOSE=True,BUILD=True)
checkAndBuildPaths(uniqueSalb_path,VERBOSE=True,BUILD=True)
checkAndBuildPaths(pixelN_path,VERBOSE=True,BUILD=True)

#a=time.time()
#extractSummaries_country([slice(None,None,None), slice(None,None,None), slice(0,12,None)],2,10,int(sys.argv[1]),int(sys.argv[2]),int(sys.argv[3]))
#print "all done from PYlib"
#print("TOTAL TIME: "+(str(time.time()-a)))
#OR

extractSummaries_country([slice(None,None,None), slice(None,None,None), slice(0,12,None)],2,10,1,1,2)


#################################################################################
EXTRACT PER-PIXEL PR, CLASS, AND BURDEN SUMMARIES
#################################################################################
COMBINE DISTRIBUTED COUNTRY AND PER PIXEL EXTRACTIONS

from extract_combineExtractions import *
for i in xrange(NJOBS):

    print 'Running extractions for realisation '+str(i)+' of '+str(NJOBS)

    # build filename of hdf5 realization file
    hdf5block_path = realisations_path
    hdf5block_path = hdf5block_path.replace('FILESTARTREL',str(FileStartRels[i]))
    hdf5block_path = hdf5block_path.replace('FILEENDREL',str(FileEndRels[i]))
    
    if PERPIXEL is True:

        print '\n running PERPIXEL extraction:'

        # check path for per-pixel exports exists
        print '\n\nchecking path for export exists..'
        checkAndBuildPaths(exportPathDistributed_perpixel,VERBOSE=True,BUILD=True)

        # now call extractSummaries_perpixel substituting in the formatted sys args 
        print '\n\nrunning extractSummaries_perpixel..'
        extractSummaries_perpixel ([slice(None,None,None), slice(None,None,None), MonthsSlice],a_lo,a_hi,NPER,FileStartRels[i],FileEndRels[i],NTOTALREL,None,None,do_PRMap,do_BurdenMap,do_RoMap)


    if PERCOUNTRY is True:

        print '\n running PERCOUNTRY extraction:'

        # check path for per-country exports exists
        print '\nchecking path for export exists..'
        checkAndBuildPaths(exportPathDistributed_country,VERBOSE=True,BUILD=True)

        # now call extractSummaries_country substituting in the formatted sys args 
import sys

S=S3(keyPath) # initialise key object

# deal with system arguments
BURDEN = True
PERPIXEL = True
PERCOUNTRY = True

if sys.argv[1] == 'False' : BURDEN=False
if sys.argv[2] == 'False' : PERPIXEL=False
if sys.argv[3] == 'False' : PERCOUNTRY=False

# make empty directory on instance to house realisation hdf5 file downloaded from S3
print '\n\tBuilding directory: '+realisations_path.rpartition('/')[0]
checkAndBuildPaths(realisations_path.rpartition('/')[0],VERBOSE=True,BUILD=True)


# optionally download the burden traces from S3 storage
if (BURDEN==True):
    print '\nDownloading burden traces from S3..'
    S3bucketname = burdentrace_path.split('/')[-2]
    print '\tS3bucketname: '+str(S3bucketname)
    S3filename = burdentrace_path.split('/')[-1]
    print '\tS3filename: '+str(S3filename)
    S.downloadFileFromBucket(S3bucketname,S3filename,burdentrace_path,overwriteContent=False,makeDirectory=True,VERBOSE=True)

if (PERPIXEL==True):
    # make empty directory on instance to house output files ready to be uploaded back to S3
    print '\n\tBuilding directory: '+exportPathDistributed_perpixel
    checkAndBuildPaths(exportPathDistributed_perpixel,VERBOSE=True,BUILD=True)    
    if (HiResLowResRatio_PERPIXEL==1):
        salblim_path=salblim5km_path
        salb_path=salb5km_path
        grump_path=grump5km_path
        pixarea_path=pixarea5km_path
        limbnry_path=lim5kmbnry_path
    if (HiResLowResRatio_PERPIXEL==5):
        salblim_path=salblim1km_path
        salb_path=salb1km_path
        grump_path=grump1km_path
        pixarea_path=pixarea1km_path
        limbnry_path=lim1kmbnry_path
    HiResLowResRatio=HiResLowResRatio_PERPIXEL

    # build path for output to house combined per-pixel output maps
    checkAndBuildPaths(exportPathCombined_perpixel,VERBOSE=True,BUILD=True)

    checkAndBuildPaths(limbnry_path,VERBOSE=True,BUILD=False)

    if (do_BurdenMap==True): checkAndBuildPaths(grump_path,VERBOSE=True,BUILD=False)

    # now call extractSummaries_perpixel substituting in the formatted sys args 
    print '\n\tCalling combineDistribExtractions_perpixel'
    combineDistribExtractions_perpixel()

    # now upload the output back to the S3 storage

if PERCOUNTRY is True:

    # define paths to input files according to specified resolution
    if (HiResLowResRatio_PERCOUNTRY==1):
    if (HiResLowResRatio_PERPIXEL == 1):
        salblim_path = salblim5km_path
        salb_path = salb5km_path
        grump_path = grump5km_path
        pixarea_path = pixarea5km_path
        limbnry_path = lim5kmbnry_path
    if (HiResLowResRatio_PERPIXEL == 5):
        salblim_path = salblim1km_path
        salb_path = salb1km_path
        grump_path = grump1km_path
        pixarea_path = pixarea1km_path
        limbnry_path = lim1kmbnry_path
    HiResLowResRatio = HiResLowResRatio_PERPIXEL

    # build path for output to house combined per-pixel output maps
    checkAndBuildPaths(exportPathCombined_perpixel, VERBOSE=True, BUILD=True)

    checkAndBuildPaths(limbnry_path, VERBOSE=True, BUILD=False)

    if (do_BurdenMap == True):
        checkAndBuildPaths(grump_path, VERBOSE=True, BUILD=False)

    # now call extractSummaries_perpixel substituting in the formatted sys args
    print '\n\tCalling combineDistribExtractions_perpixel'
    combineDistribExtractions_perpixel()

    # now upload the output back to the S3 storage

if PERCOUNTRY is True:

    # define paths to input files according to specified resolution
#print type(PERPIXEL)

if (PERPIXEL == True):

    # download from S3 contents of bucket 'distributedoutput_perpixel', will automatically build the local directory if necessary
    print '\n\tDownloading contents of S3 bucket ' + str(
        exportPathDistributed_perpixel.split('/')
        [2]) + ' to local directory ' + exportPathDistributed_perpixel
    S.downloadBucketContents(exportPathDistributed_perpixel.split('/')[2],
                             exportPathDistributed_perpixel,
                             overwriteContent=False,
                             VERBOSE=True)

    # build path for output to house combined per-pixel output maps
    print '\n\tChecking path for ' + exportPathCombined_perpixel
    checkAndBuildPaths(exportPathCombined_perpixel, VERBOSE=True, BUILD=True)

    # download from S3 the other necessary files (optionally need 5km grump for burden map)
    print '\n\tDownloading lim5kmbnry file from S3..'
    S3bucketname = lim5kmbnry_path.split('/')[-2]
    print '\t\tS3bucketname: ' + str(S3bucketname)
    S3filename = lim5kmbnry_path.split('/')[-1]
    print '\t\tS3filename: ' + str(S3filename)
    S.downloadFileFromBucket(S3bucketname,
                             S3filename,
                             lim5kmbnry_path,
                             overwriteContent=False,
                             makeDirectory=True,
                             VERBOSE=True)
    checkAndBuildPaths(lim5kmbnry_path, VERBOSE=True, BUILD=False)
Exemple #14
0
S = S3(keyPath)  # initialise key object

# deal with system arguments
BURDEN = True
PERPIXEL = True
PERCOUNTRY = True

if sys.argv[1] == 'False': BURDEN = False
if sys.argv[2] == 'False': PERPIXEL = False
if sys.argv[3] == 'False': PERCOUNTRY = False

# make empty directory on instance to house realisation hdf5 file downloaded from S3
print '\n\tBuilding directory: ' + realisations_path.rpartition('/')[0]
checkAndBuildPaths(realisations_path.rpartition('/')[0],
                   VERBOSE=True,
                   BUILD=True)

# optionally download the burden traces from S3 storage
if (BURDEN == True):
    print '\nDownloading burden traces from S3..'
    S3bucketname = burdentrace_path.split('/')[-2]
    print '\tS3bucketname: ' + str(S3bucketname)
    S3filename = burdentrace_path.split('/')[-1]
    print '\tS3filename: ' + str(S3filename)
    S.downloadFileFromBucket(S3bucketname,
                             S3filename,
                             burdentrace_path,
                             overwriteContent=False,
                             makeDirectory=True,
                             VERBOSE=True)
def BuildAsciiParamsInHDF5(hdfFilePath,CELLTOLLERANCE = 1e-6,missingDefault=-9999,overwrite=False):

    ''' Takes and hdf5 grid file, which must have attributes: data, long, and lat
        will then work out the ascii header parameters for theis grid and add them to _v_attrs
        
        Input parameters;
        hdfFilePath (string): location including filename of hdf5 file
        CELLTOLLERANCE (float): when inferring the cellsize by calculating difference between stated cell
                                positions in lat/long, how much disparity are we happy with?
        missingDefault (float or int): what value shall we use in the ascii header for missing
        overwrite (Boolean): if the hdf5 file alrady has a given piece of header inf, shall we overwrite with ehat we infer here?
        
        returns:
        nothing returned, but potentially changes hdf5 file in-situ
    ''' 

    #check hdf5 file exists, and exxit if not
    if(checkAndBuildPaths(hdfFilePath,VERBOSE=True,BUILD=False)==-9999):
        raise ValueError ('hdf5 file does not exist')    
    
    # Initialize hdf5 file in append mode so can add new attributes
    outHDF5 = tb.openFile(hdfFilePath, mode='a')
    lon =  outHDF5.root.lon
    lat =  outHDF5.root.lat  
    
    # check in y-x+ format, or else not configured
    if outHDF5.root.data.attrs.view != 'y-x+':
        raise ValueError ('hdf5 file: '+str(hdfFilePath)+'\nis not in y-x+ view, and function not configured to handle any other')    

    # infer ascii parameters in turn, for each first checking if it it already exists, and optionally overwriting
    ncols = len(lon)
    if hasattr(outHDF5.root._v_attrs,'ncols'):
        if(outHDF5.root._v_attrs.ncols !=ncols):
            print ('existing ncols value ('+str(outHDF5.root._v_attrs.ncols)+' != that calculated here '+str(ncols))

        if overwrite==False:
            print ('hdf5 file at'+str(hdfFilePath)+',already has attribute ncols and overwrite==FALSE')
        else:
            print ('hdf5 file at'+str(hdfFilePath)+',already has attribute ncols, but replacing because overwrite==True')
            outHDF5.root._v_attrs.ncols = ncols
    else:
        outHDF5.root._v_attrs.ncols = ncols     

    nrows = len(lat)
    if hasattr(outHDF5.root._v_attrs,'nrows'):
        if(outHDF5.root._v_attrs.nrows !=nrows):
            print ('existing nrows value ('+str(outHDF5.root._v_attrs.nrows)+' != that calculated here '+str(nrows))

        if overwrite==False:
            print ('hdf5 file at'+str(hdfFilePath)+',already has attribute nrows and overwrite==FALSE')
        else:
            print ('hdf5 file at'+str(hdfFilePath)+',already has attribute nrows, but replacing because overwrite==True')
            outHDF5.root._v_attrs.nrows = nrows
    else:
        outHDF5.root._v_attrs.nrows = nrows  


    minx = min(lon)
    if hasattr(outHDF5.root._v_attrs,'minx'):
        if(outHDF5.root._v_attrs.minx !=minx):
            print ('existing minx value ('+str(outHDF5.root._v_attrs.minx)+' != that calculated here '+str(minx))

        if overwrite==False:
            print ('hdf5 file at'+str(hdfFilePath)+',already has attribute minx and overwrite==FALSE')
        else:
            print ('hdf5 file at'+str(hdfFilePath)+',already has attribute minx, but replacing because overwrite==True')
            outHDF5.root._v_attrs.minx = minx
    else:
        outHDF5.root._v_attrs.minx = minx  

    maxx = max(lon)
    if hasattr(outHDF5.root._v_attrs,'maxx'):
        if(outHDF5.root._v_attrs.maxx !=maxx):
            print ('existing maxx value ('+str(outHDF5.root._v_attrs.maxx)+' != that calculated here '+str(maxx))

        if overwrite==False:
            print ('hdf5 file at'+str(hdfFilePath)+',already has attribute maxx and overwrite==FALSE')
        else:
            print ('hdf5 file at'+str(hdfFilePath)+',already has attribute maxx, but replacing because overwrite==True')
            outHDF5.root._v_attrs.maxx = maxx
    else:
        outHDF5.root._v_attrs.maxx = maxx

    miny = min(lat)
    if hasattr(outHDF5.root._v_attrs,'miny'):
        if(outHDF5.root._v_attrs.miny !=miny):
            print ('existing maxx value ('+str(outHDF5.root._v_attrs.miny)+' != that calculated here '+str(miny))

        if overwrite==False:
            print ('hdf5 file at'+str(hdfFilePath)+',already has attribute miny and overwrite==FALSE')
        else:
            print ('hdf5 file at'+str(hdfFilePath)+',already has attribute miny, but replacing because overwrite==True')
            outHDF5.root._v_attrs.miny = miny
    else:
        outHDF5.root._v_attrs.miny = miny
        
        
    maxy = max(lat)
    if hasattr(outHDF5.root._v_attrs,'maxy'):
        if(outHDF5.root._v_attrs.maxy !=maxy):
            print ('existing maxy value ('+str(outHDF5.root._v_attrs.maxy)+' != that calculated here '+str(maxy))

        if overwrite==False:
            print ('hdf5 file at'+str(hdfFilePath)+',already has attribute maxy and overwrite==FALSE')
        else:
            print ('hdf5 file at'+str(hdfFilePath)+',already has attribute maxy, but replacing because overwrite==True')
            outHDF5.root._v_attrs.maxy = maxy
    else:
        outHDF5.root._v_attrs.maxy = maxy        
        
    cellsizeX = lon[1] - lon[0]
    cellsizeY = lat[1] - lat[0]    
    if(abs(cellsizeX - cellsizeY)>CELLTOLLERANCE):
        print ('Inferred cell sizes from lat '+str(cellsizeY)+' and long ' +str(cellsizeX)+' do not match')
        
    cellsize = cellsizeX
    if hasattr(outHDF5.root._v_attrs,'cellsize'):
        if(outHDF5.root._v_attrs.cellsize !=cellsize):
            print ('existing cellsize value ('+str(outHDF5.root._v_attrs.cellsize)+' != that calculated here '+str(cellsize))

        if overwrite==False:
            print ('hdf5 file at'+str(hdfFilePath)+',already has attribute cellsize and overwrite==FALSE')
        else:
            print ('hdf5 file at'+str(hdfFilePath)+',already has attribute cellsize, but replacing because overwrite==True')
            outHDF5.root._v_attrs.cellsize = cellsize
    else:
        outHDF5.root._v_attrs.cellsize = cellsize                
        

    order = outHDF5.root.data.attrs.view
    if hasattr(outHDF5.root._v_attrs,'order'):
        if(outHDF5.root._v_attrs.order !=order):
            print ('existing order value ('+str(outHDF5.root._v_attrs.order)+' != that calculated here '+str(order))

        if overwrite==False:
            print ('hdf5 file at'+str(hdfFilePath)+',already has attribute order and overwrite==FALSE')
        else:
            print ('hdf5 file at'+str(hdfFilePath)+',already has attribute order, but replacing because overwrite==True')
            outHDF5.root._v_attrs.order = order
    else:
        outHDF5.root._v_attrs.order = order            

    missing = missingDefault
    if hasattr(outHDF5.root._v_attrs,'missing'):
        if(outHDF5.root._v_attrs.missing !=missing):
            print ('existing missing value ('+str(outHDF5.root._v_attrs.missing)+' != that calculated here '+str(missing))

        if overwrite==False:
            print ('hdf5 file at'+str(hdfFilePath)+',already has attribute missing and overwrite==FALSE')
        else:
            print ('hdf5 file at'+str(hdfFilePath)+',already has attribute missing, but replacing because overwrite==True')
            outHDF5.root._v_attrs.missing = missing
    else:
        outHDF5.root._v_attrs.missing = missing    

    outHDF5.close()
Exemple #16
0
    def downloadBucketContents(self, bucketName, targetDirectoryPath, overwriteContent, VERBOSE=True):

        """
        Downloads contents of an entire bucket to a specified local file.
    
        params to pass:
        bucketName              : (string) name of bucket of interest  
        targetDirectoryPath     : (string) path to target directory. If this includes new directories, these will be built if possible
        overwriteContent        : (logical) if the specifid pathis to an existing directory, and there are existing files with the same name as those in the bucket, do we overwrite?
        """

        # check bucket exists on S3
        if self.conn.lookup(bucketName) is None:
            print 'WARNING!!! requested bucket "' + str(bucketName) + '" does not exist on S3 !!!'

        # check target local directory exists and if not then build it
        if checkAndBuildPaths(targetDirectoryPath, BUILD=True) == -9999:
            raise RuntimeError, 'Problem building target directory "' + str(targetDirectoryPath) + '" : EXITING!!!'

        # get list of files already in target directory
        existinglocalfiles = os.listdir(targetDirectoryPath)

        # loop through all files in the bucket
        bucket = self.conn.get_bucket(bucketName)
        rs = bucket.list()
        for key in rs:

            # if not overwriting, check no file exists in local directory with same name as this file
            if overwriteContent == False:
                if existinglocalfiles.count(str(key.name)) > 0:
                    if VERBOSE == True:
                        print 'WARNING!!! file "' + str(key.name) + '" already present in local directory "' + str(
                            targetDirectoryPath
                        ) + '" and overwriteContent==False '
                    continue

            # if we are overwriting, check that existing file is not actually a directory
            if overwriteContent == True:
                if os.path.isdir(targetDirectoryPath + str(key.name)) == True:
                    raise RuntimeError, 'A directory ("' + str(key.name) + '") exists at path "' + str(
                        targetDirectoryPath
                    ) + '" with same name as file trying to download: EXITING!!'

            # build full target filepath
            if targetDirectoryPath[-1] != "/":
                targetDirectoryPath = targetDirectoryPath + "/"
            filePathAtDestination = targetDirectoryPath + str(key.name)

            # now copy this file from S3 bucket to local directory
            key.get_contents_to_filename(filePathAtDestination)

            # check file has made it to destination

            ## first check there is even a file of this name at local destination
            if os.path.exists(filePathAtDestination) != True:
                raise RuntimeError, 'Final check revealed file "' + str(
                    filePathAtDestination
                ) + '" did not copy succesfully from S3 file "' + str(key.name) + '" in bucket "' + str(
                    bucketName
                ) + '"'

            ## then check the md5 keys match
            md5_s3 = key.etag.strip(str('"'))
            md5string = md5.new(file(filePathAtDestination).read()).hexdigest()

            if md5string != md5_s3:
                raise RuntimeError, 'Final check revealed file "' + str(
                    filePathAtDestination
                ) + '" did not copy succesfully from S3 file "' + str(key.name) + '" in bucket "' + str(
                    bucketName
                ) + '"'
Exemple #17
0
# pre-build any necessary directories

print 'Starting: ECRUNSCRIPT_CONDSIM_PREDOWNLOAD..'

# import libraries
from map_utils import checkAndBuildPaths
from map_utils import S3
from CONDSIM_params import *
import sys

S = S3(keyPath)  # initialise key object

# make empty directory on instance to house realization hdf5 file that wil be generated
print '\n\tBuilding directory: ' + realizations_path.rpartition('/')[0]
checkAndBuildPaths(realizations_path.rpartition('/')[0],
                   VERBOSE=True,
                   BUILD=True)

# download from S3 the necessary auxilliary files..

## mcmc trace file
print '\nDownloading burden traces from S3..'
S3bucketname = trace_path.split('/')[-2]
print '\tS3bucketname: ' + str(S3bucketname)
S3filename = trace_path.split('/')[-1]
print '\tS3filename: ' + str(S3filename)
S.downloadFileFromBucket(S3bucketname,
                         S3filename,
                         trace_path,
                         overwriteContent=False,
                         makeDirectory=True,
if PERPIXEL == True:

    # download from S3 contents of bucket 'distributedoutput_perpixel', will automatically build the local directory if necessary
    print "\n\tDownloading contents of S3 bucket " + str(
        exportPathDistributed_perpixel.split("/")[2]
    ) + " to local directory " + exportPathDistributed_perpixel
    S.downloadBucketContents(
        exportPathDistributed_perpixel.split("/")[2],
        exportPathDistributed_perpixel,
        overwriteContent=False,
        VERBOSE=True,
    )

    # build path for output to house combined per-pixel output maps
    print "\n\tChecking path for " + exportPathCombined_perpixel
    checkAndBuildPaths(exportPathCombined_perpixel, VERBOSE=True, BUILD=True)

    # download from S3 the other necessary files (optionally need 5km grump for burden map)
    print "\n\tDownloading lim5kmbnry file from S3.."
    S3bucketname = lim5kmbnry_path.split("/")[-2]
    print "\t\tS3bucketname: " + str(S3bucketname)
    S3filename = lim5kmbnry_path.split("/")[-1]
    print "\t\tS3filename: " + str(S3filename)
    S.downloadFileFromBucket(
        S3bucketname, S3filename, lim5kmbnry_path, overwriteContent=False, makeDirectory=True, VERBOSE=True
    )
    checkAndBuildPaths(lim5kmbnry_path, VERBOSE=True, BUILD=False)

    if BURDEN == True:
        print "\n\tDownloading grump5km file from S3.."
        S3bucketname = grump5km_path.split("/")[-2]
# script to download to an instance, before anything executes,  any necessary auxilliary files, and to 
# pre-build any necessary directories

print 'Starting: ECRUNSCRIPT_CONDSIM_PREDOWNLOAD..'

# import libraries
from map_utils import checkAndBuildPaths
from map_utils import S3
from CONDSIM_params import *
import sys

S=S3(keyPath) # initialise key object

# make empty directory on instance to house realization hdf5 file that wil be generated
print '\n\tBuilding directory: '+realizations_path.rpartition('/')[0]
checkAndBuildPaths(realizations_path.rpartition('/')[0],VERBOSE=True,BUILD=True)

# download from S3 the necessary auxilliary files..

## mcmc trace file
print '\nDownloading burden traces from S3..'
S3bucketname = trace_path.split('/')[-2]
print '\tS3bucketname: '+str(S3bucketname)
S3filename = trace_path.split('/')[-1]
print '\tS3filename: '+str(S3filename)
S.downloadFileFromBucket(S3bucketname,S3filename,trace_path,overwriteContent=False,makeDirectory=True,VERBOSE=True)

## global 5km stable mask
print '\nDownloading b5km stable mask from S3..'
S3bucketname = lim5kmbnry_path.split('/')[-2]
print '\tS3bucketname: '+str(S3bucketname)