def downloadFileFromBucket(self, bucketName,fileNameInBucket,filePathAtDestination,overwriteContent,makeDirectory,VERBOSE=True): ''' Downloads a single file from a bucket to a local directory params to pass: bucketName : (string) name of bucket in which file of interest is lcoated fileNameInBucket : (string) name of file of interest in bucket filePathAtDestination : (string) full path - including filename itself - of the file once it has been downloaded overwriteContent : (logical) if the specified target file already exists locally, do we overwrite? makeDirectory : (logical) if the specified target file path includes new directories, should we make these? ''' # check file exists in bucket if (self.CheckFileExistsInBucket(bucketName,fileNameInBucket,VERBOSE=True) != True) : raise RuntimeError, 'File "'+str(fileNameInBucket)+'" does not exist in bucket: will not download requested file !!!' # if we are not overwriting, then check if file alrady exists locally : warn and abort download if it does if overwriteContent==False: if (os.path.exists(filePathAtDestination)==True): warnings.warn ('File "'+str(filePathAtDestination)+'" already exists and overwriteContent==False: will not download requested file !!!' ) # if we are overwriting, check that existing file is not actually a directory if overwriteContent==True: if(os.path.isdir(filePathAtDestination)==True): raise RuntimeError, 'A directory ("'+str(fileNameInBucket)+'") exists at path "'+str(filePathAtDestination)+'" with same name as file trying to download: EXITING!!' # if we are making the local directory in which to copy this file, use checkAndBuildPaths to ensure it exists if makeDirectory==True: # first remove filename from file path to leave just path to directory fpathTrunc = filePathAtDestination.rpartition('/')[0] checkAndBuildPaths (fpathTrunc,VERBOSE=True,BUILD=True) bucket = self.conn.get_bucket(bucketName) # establish key object filekey=bucket.get_key(fileNameInBucket) # pass the contents of file on s3 to the local file filekey.get_contents_to_filename(filePathAtDestination) # finally, check that this file made it from S3 to local destination ## first check there is even a file of this name at local destination if(os.path.exists(filePathAtDestination)!= True): raise RuntimeError, 'Final check revealed file "'+str(filePathAtDestination)+'" did not copy succesfully from S3 file "'+str(fileNameInBucket)+'" in bucket "'+str(bucketName)+'"' ## then check the md5 keys match md5_s3 = filekey.etag.strip(str('"')) md5string = md5.new(file(filePathAtDestination).read()).hexdigest() if(md5string != md5_s3): raise RuntimeError, 'Final check revealed file "'+str(filePathAtDestination)+'" did not copy succesfully from S3 file "'+str(fileNameInBucket)+'" in bucket "'+str(bucketName)+'"'
def isLOCALFILEIdenticalToS3FILE(self, bucketName, fileNameInBucket, localFilePath): """ Checks whther a local file and a file on S3 are identical, according to their md5 strings. Does all the necessary checks and returns a True/False accordingly params to pass: bucketName : (string) name of bucket that file of interest is located in fileNameInBucket : (string) name of file of interest in the bucket localFilePath : (logical) full path to local file of interest """ # check local file exists if checkAndBuildPaths(localFilePath, VERBOSE=True, BUILD=False) == -9999: return False # get md5 string for local file md5string = md5.new(file(localFilePath).read()).hexdigest() ## check this bucket exits if self.conn.lookup(bucketName) is None: print 'WARNING!!! requested bucket "' + str(bucketName) + '" does not exist on S3 !!!' return False ## check the file exists on this bucket if self.CheckFileExistsInBucket(bucketName, fileNameInBucket, VERBOSE=True) != True: return False ## get md5 string for this file in the bucket bucket = self.conn.get_bucket(bucketName) filekey = bucket.get_key(fileNameInBucket) md5_s3 = filekey.etag.strip(str('"')) # compare this to the passed md5 string if md5string != md5_s3: raise RuntimeError, 'The md5 string of file "' + str(fileNameInBucket) + '" in bucket "' + str( bucketName ) + '" does not match that of local file at "' + str(localFilePath) + '" !!!!' # if these tests passed, then return True return True
print "FileEndRel: " + str(FileEndRel) print "totalN: " + str(totalN) print "startRel: " + str(startRel) print "endRel: " + str(endRel) print "BURDEN: " + str(BURDEN) print "PERPIXEL: " + str(PERPIXEL) print "PERCOUNTRY: " + str(PERCOUNTRY) if PERPIXEL == True: print '\n running PERPIXEL extraction' # check path for per-pixel exports exists print '\nchecking path for export exists..' checkAndBuildPaths(exportPathDistributed_perpixel, VERBOSE=True, BUILD=True) # now call extractSummaries_perpixel substituting in the formatted sys args print '\nrunning extractSummaries_perpixel..' extractSummaries_perpixel( [slice(None, None, None), slice(None, None, None), MonthsSlice], a_lo, a_hi, n_per, FileStartRel, FileEndRel, totalN, startRel, endRel, BURDEN) # now upload the output back to the S3 storage #S.uploadDirectoryAsBucket('distributedoutput_perpixel',exportPathDistributed_perpixel,uploadConstituentFiles=True,overwriteContent=True) ## loop through all files in local export storage for fname in os.listdir(exportPathDistributed_perpixel):
# import libraries from map_utils import amazon_ec from map_utils import S3 import numpy as np from map_utils import checkAndBuildPaths import time # initialise amazon S3 key object S=S3(keyPath='/home/pwg/mbg-world/mbgw-scripts/s3code.txt') # set job distribution parameters NINSTANCES = 19 MAXJOBSPERINSTANCE = 3 MAXJOBTRIES = 1 #maximum number of tries before we give up on any individual job STDOUTPATH = '/home/pwg/mbg-world/stdout_extraction/DistributedOutputSTDOUTERR_'+str(PARAMFILE.partition('.')[0])+'_'+str(time.ctime())+'/' checkAndBuildPaths(STDOUTPATH,VERBOSE=True,BUILD=True) # set path to realisations on S3 and extract bucket and generic file name relBucket = localparams.realisations_path.rsplit('/')[-2] relPath = localparams.realisations_path.rsplit('/')[-1] # call queryRealizationsInBucket to obtain number and start/end realisation numbers of these realisation files relDict = S.queryRealizationsInBucket(relBucket,relPath,VERBOSE=True) print '\nquerying bucket '+str(relBucket)+' : found '+str(relDict['Nrealisations'])+' realisations accross '+str(relDict['Nfiles'])+' files.' # set realization number parameters NRELS = relDict['Nrealisations'] NJOBS = relDict['Nfiles'] ####################################TEMP
# deal with system arguments (expects two) RESERVATIONID = sys.argv[1] ## defines ID of reservation that contains the instances we will use on EC2 PARAMFILE_PY = sys.argv[2] ## defines name of python file housing the parmeter definitions (e.g. extract_params_AF.py) PARAMFILE_R = int(sys.argv[3]) ## defines name of R file housing additoinal parmeter definitions for conditoinal simulation R scripts #MAXJOBSPERINSTANCE = int(sys.argv[4]) # initialise amazon S3 key object S=S3(keyPath='/home/pwg/mbg-world/mbgw-scripts/s3code.txt') # set job distribution parameters NINSTANCES = 19 MAXJOBSPERINSTANCE = 1 MAXJOBTRIES = 1 #maximum number of tries before we give up on any individual job STDOUTPATH = '/home/pwg/mbg-world/stdout_CONDSIM/DistributedOutputSTDOUTERR_'+str(PARAMFILE_PY.partition('.')[0])+'_'+str(time.ctime())+'/' checkAndBuildPaths(STDOUTPATH,VERBOSE=True,BUILD=True) # set realization number parameters n_total = 57#100 #600 iter_per_job = 1 NJOBS = n_total / iter_per_job #############TEMP INTERIMINDEX=np.array([17,18,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55]) ################## # define files to upload to instance (from local machine) before any execution
from rpy import * from extract_PYlib import examineSalb # import some r functions r.source('/home/pwg/map_utils/map_utils/GeneralUtility.R') writeTableWithNamesPY = r['writeTableWithNames'] # set some parameters salb1km_path = "/home/pwg/mbg-world/datafiles/auxiliary_data/GridsForCS/salb1km-e2_y-x+.hdf5" grump1km_path= "/home/pwg/mbg-world/datafiles/auxiliary_data/GridsForCS/gr071km_y-x+.hdf5" lims1km_path= "/home/pwg/mbg-world/datafiles/auxiliary_data/GridsForCS/lims1km-e_y-x+.hdf5" outputTable_path = "/home/pwg/mbg-world/extraction/FixedPopulationExtraction.csv" # check paths from map_utils import checkAndBuildPaths checkAndBuildPaths(salb1km_path,VERBOSE=True,BUILD=False) checkAndBuildPaths(grump1km_path,VERBOSE=True,BUILD=False) checkAndBuildPaths(lims1km_path,VERBOSE=True,BUILD=False) # open link to salb grid, 3-level limits grid, and population grid salb1km = tb.openFile(salb1km_path, mode = "r") grump1km = tb.openFile(grump1km_path, mode = "r") lims1km = tb.openFile(lims1km_path, mode = "r") # run check that input grids are the same shape if(((np.shape(salb1km.root.data)==np.shape(grump1km.root.data)==np.shape(lims1km.root.data))==False)): print "WARNING!! input grids are of uneven shape. salb1km="+str(np.shape(salb1km.root.data))+"; grump1km="+str(np.shape(grump1km.root.data))+"; lims1km="+str(np.shape(lims1km.root.data)) # run extract salb to get list of unique salb IDs and corresponding pixel count salbDict = examineSalb (salb1km)
from extract_PYlib import examineSalb # import some r functions r.source('/home/pwg/map_utils/map_utils/GeneralUtility.R') writeTableWithNamesPY = r['writeTableWithNames'] # set some parameters salb1km_path = "/home/pwg/mbg-world/datafiles/auxiliary_data/GridsForCS/salb1km-e2_y-x+.hdf5" grump1km_path = "/home/pwg/mbg-world/datafiles/auxiliary_data/GridsForCS/gr071km_y-x+.hdf5" lims1km_path = "/home/pwg/mbg-world/datafiles/auxiliary_data/GridsForCS/lims1km-e_y-x+.hdf5" outputTable_path = "/home/pwg/mbg-world/extraction/FixedPopulationExtraction.csv" # check paths from map_utils import checkAndBuildPaths checkAndBuildPaths(salb1km_path, VERBOSE=True, BUILD=False) checkAndBuildPaths(grump1km_path, VERBOSE=True, BUILD=False) checkAndBuildPaths(lims1km_path, VERBOSE=True, BUILD=False) # open link to salb grid, 3-level limits grid, and population grid salb1km = tb.openFile(salb1km_path, mode="r") grump1km = tb.openFile(grump1km_path, mode="r") lims1km = tb.openFile(lims1km_path, mode="r") # run check that input grids are the same shape if (((np.shape(salb1km.root.data) == np.shape(grump1km.root.data) == np.shape( lims1km.root.data)) == False)): print "WARNING!! input grids are of uneven shape. salb1km=" + str( np.shape(salb1km.root.data)) + "; grump1km=" + str( np.shape(grump1km.root.data)) + "; lims1km=" + str( np.shape(lims1km.root.data))
################################################################################# EXTRACT PER-COUNTRY MEAN PR,BURDEN,PAR from extract_PYlib import * # check filepaths stated in parameter file from map_utils import checkAndBuildPaths #checkAndBuildPaths(filename,VERBOSE=True,BUILD=True) checkAndBuildPaths(exportPathDistributed_country,VERBOSE=True,BUILD=True) checkAndBuildPaths(exportPathCombined_country,VERBOSE=True,BUILD=True) checkAndBuildPaths(salblim1km_path,VERBOSE=True,BUILD=True) checkAndBuildPaths(gr001km_path,VERBOSE=True,BUILD=True) checkAndBuildPaths(uniqueSalb_path,VERBOSE=True,BUILD=True) checkAndBuildPaths(pixelN_path,VERBOSE=True,BUILD=True) #a=time.time() #extractSummaries_country([slice(None,None,None), slice(None,None,None), slice(0,12,None)],2,10,int(sys.argv[1]),int(sys.argv[2]),int(sys.argv[3])) #print "all done from PYlib" #print("TOTAL TIME: "+(str(time.time()-a))) #OR extractSummaries_country([slice(None,None,None), slice(None,None,None), slice(0,12,None)],2,10,1,1,2) ################################################################################# EXTRACT PER-PIXEL PR, CLASS, AND BURDEN SUMMARIES ################################################################################# COMBINE DISTRIBUTED COUNTRY AND PER PIXEL EXTRACTIONS from extract_combineExtractions import *
for i in xrange(NJOBS): print 'Running extractions for realisation '+str(i)+' of '+str(NJOBS) # build filename of hdf5 realization file hdf5block_path = realisations_path hdf5block_path = hdf5block_path.replace('FILESTARTREL',str(FileStartRels[i])) hdf5block_path = hdf5block_path.replace('FILEENDREL',str(FileEndRels[i])) if PERPIXEL is True: print '\n running PERPIXEL extraction:' # check path for per-pixel exports exists print '\n\nchecking path for export exists..' checkAndBuildPaths(exportPathDistributed_perpixel,VERBOSE=True,BUILD=True) # now call extractSummaries_perpixel substituting in the formatted sys args print '\n\nrunning extractSummaries_perpixel..' extractSummaries_perpixel ([slice(None,None,None), slice(None,None,None), MonthsSlice],a_lo,a_hi,NPER,FileStartRels[i],FileEndRels[i],NTOTALREL,None,None,do_PRMap,do_BurdenMap,do_RoMap) if PERCOUNTRY is True: print '\n running PERCOUNTRY extraction:' # check path for per-country exports exists print '\nchecking path for export exists..' checkAndBuildPaths(exportPathDistributed_country,VERBOSE=True,BUILD=True) # now call extractSummaries_country substituting in the formatted sys args
import sys S=S3(keyPath) # initialise key object # deal with system arguments BURDEN = True PERPIXEL = True PERCOUNTRY = True if sys.argv[1] == 'False' : BURDEN=False if sys.argv[2] == 'False' : PERPIXEL=False if sys.argv[3] == 'False' : PERCOUNTRY=False # make empty directory on instance to house realisation hdf5 file downloaded from S3 print '\n\tBuilding directory: '+realisations_path.rpartition('/')[0] checkAndBuildPaths(realisations_path.rpartition('/')[0],VERBOSE=True,BUILD=True) # optionally download the burden traces from S3 storage if (BURDEN==True): print '\nDownloading burden traces from S3..' S3bucketname = burdentrace_path.split('/')[-2] print '\tS3bucketname: '+str(S3bucketname) S3filename = burdentrace_path.split('/')[-1] print '\tS3filename: '+str(S3filename) S.downloadFileFromBucket(S3bucketname,S3filename,burdentrace_path,overwriteContent=False,makeDirectory=True,VERBOSE=True) if (PERPIXEL==True): # make empty directory on instance to house output files ready to be uploaded back to S3 print '\n\tBuilding directory: '+exportPathDistributed_perpixel checkAndBuildPaths(exportPathDistributed_perpixel,VERBOSE=True,BUILD=True)
if (HiResLowResRatio_PERPIXEL==1): salblim_path=salblim5km_path salb_path=salb5km_path grump_path=grump5km_path pixarea_path=pixarea5km_path limbnry_path=lim5kmbnry_path if (HiResLowResRatio_PERPIXEL==5): salblim_path=salblim1km_path salb_path=salb1km_path grump_path=grump1km_path pixarea_path=pixarea1km_path limbnry_path=lim1kmbnry_path HiResLowResRatio=HiResLowResRatio_PERPIXEL # build path for output to house combined per-pixel output maps checkAndBuildPaths(exportPathCombined_perpixel,VERBOSE=True,BUILD=True) checkAndBuildPaths(limbnry_path,VERBOSE=True,BUILD=False) if (do_BurdenMap==True): checkAndBuildPaths(grump_path,VERBOSE=True,BUILD=False) # now call extractSummaries_perpixel substituting in the formatted sys args print '\n\tCalling combineDistribExtractions_perpixel' combineDistribExtractions_perpixel() # now upload the output back to the S3 storage if PERCOUNTRY is True: # define paths to input files according to specified resolution if (HiResLowResRatio_PERCOUNTRY==1):
if (HiResLowResRatio_PERPIXEL == 1): salblim_path = salblim5km_path salb_path = salb5km_path grump_path = grump5km_path pixarea_path = pixarea5km_path limbnry_path = lim5kmbnry_path if (HiResLowResRatio_PERPIXEL == 5): salblim_path = salblim1km_path salb_path = salb1km_path grump_path = grump1km_path pixarea_path = pixarea1km_path limbnry_path = lim1kmbnry_path HiResLowResRatio = HiResLowResRatio_PERPIXEL # build path for output to house combined per-pixel output maps checkAndBuildPaths(exportPathCombined_perpixel, VERBOSE=True, BUILD=True) checkAndBuildPaths(limbnry_path, VERBOSE=True, BUILD=False) if (do_BurdenMap == True): checkAndBuildPaths(grump_path, VERBOSE=True, BUILD=False) # now call extractSummaries_perpixel substituting in the formatted sys args print '\n\tCalling combineDistribExtractions_perpixel' combineDistribExtractions_perpixel() # now upload the output back to the S3 storage if PERCOUNTRY is True: # define paths to input files according to specified resolution
#print type(PERPIXEL) if (PERPIXEL == True): # download from S3 contents of bucket 'distributedoutput_perpixel', will automatically build the local directory if necessary print '\n\tDownloading contents of S3 bucket ' + str( exportPathDistributed_perpixel.split('/') [2]) + ' to local directory ' + exportPathDistributed_perpixel S.downloadBucketContents(exportPathDistributed_perpixel.split('/')[2], exportPathDistributed_perpixel, overwriteContent=False, VERBOSE=True) # build path for output to house combined per-pixel output maps print '\n\tChecking path for ' + exportPathCombined_perpixel checkAndBuildPaths(exportPathCombined_perpixel, VERBOSE=True, BUILD=True) # download from S3 the other necessary files (optionally need 5km grump for burden map) print '\n\tDownloading lim5kmbnry file from S3..' S3bucketname = lim5kmbnry_path.split('/')[-2] print '\t\tS3bucketname: ' + str(S3bucketname) S3filename = lim5kmbnry_path.split('/')[-1] print '\t\tS3filename: ' + str(S3filename) S.downloadFileFromBucket(S3bucketname, S3filename, lim5kmbnry_path, overwriteContent=False, makeDirectory=True, VERBOSE=True) checkAndBuildPaths(lim5kmbnry_path, VERBOSE=True, BUILD=False)
S = S3(keyPath) # initialise key object # deal with system arguments BURDEN = True PERPIXEL = True PERCOUNTRY = True if sys.argv[1] == 'False': BURDEN = False if sys.argv[2] == 'False': PERPIXEL = False if sys.argv[3] == 'False': PERCOUNTRY = False # make empty directory on instance to house realisation hdf5 file downloaded from S3 print '\n\tBuilding directory: ' + realisations_path.rpartition('/')[0] checkAndBuildPaths(realisations_path.rpartition('/')[0], VERBOSE=True, BUILD=True) # optionally download the burden traces from S3 storage if (BURDEN == True): print '\nDownloading burden traces from S3..' S3bucketname = burdentrace_path.split('/')[-2] print '\tS3bucketname: ' + str(S3bucketname) S3filename = burdentrace_path.split('/')[-1] print '\tS3filename: ' + str(S3filename) S.downloadFileFromBucket(S3bucketname, S3filename, burdentrace_path, overwriteContent=False, makeDirectory=True, VERBOSE=True)
def BuildAsciiParamsInHDF5(hdfFilePath,CELLTOLLERANCE = 1e-6,missingDefault=-9999,overwrite=False): ''' Takes and hdf5 grid file, which must have attributes: data, long, and lat will then work out the ascii header parameters for theis grid and add them to _v_attrs Input parameters; hdfFilePath (string): location including filename of hdf5 file CELLTOLLERANCE (float): when inferring the cellsize by calculating difference between stated cell positions in lat/long, how much disparity are we happy with? missingDefault (float or int): what value shall we use in the ascii header for missing overwrite (Boolean): if the hdf5 file alrady has a given piece of header inf, shall we overwrite with ehat we infer here? returns: nothing returned, but potentially changes hdf5 file in-situ ''' #check hdf5 file exists, and exxit if not if(checkAndBuildPaths(hdfFilePath,VERBOSE=True,BUILD=False)==-9999): raise ValueError ('hdf5 file does not exist') # Initialize hdf5 file in append mode so can add new attributes outHDF5 = tb.openFile(hdfFilePath, mode='a') lon = outHDF5.root.lon lat = outHDF5.root.lat # check in y-x+ format, or else not configured if outHDF5.root.data.attrs.view != 'y-x+': raise ValueError ('hdf5 file: '+str(hdfFilePath)+'\nis not in y-x+ view, and function not configured to handle any other') # infer ascii parameters in turn, for each first checking if it it already exists, and optionally overwriting ncols = len(lon) if hasattr(outHDF5.root._v_attrs,'ncols'): if(outHDF5.root._v_attrs.ncols !=ncols): print ('existing ncols value ('+str(outHDF5.root._v_attrs.ncols)+' != that calculated here '+str(ncols)) if overwrite==False: print ('hdf5 file at'+str(hdfFilePath)+',already has attribute ncols and overwrite==FALSE') else: print ('hdf5 file at'+str(hdfFilePath)+',already has attribute ncols, but replacing because overwrite==True') outHDF5.root._v_attrs.ncols = ncols else: outHDF5.root._v_attrs.ncols = ncols nrows = len(lat) if hasattr(outHDF5.root._v_attrs,'nrows'): if(outHDF5.root._v_attrs.nrows !=nrows): print ('existing nrows value ('+str(outHDF5.root._v_attrs.nrows)+' != that calculated here '+str(nrows)) if overwrite==False: print ('hdf5 file at'+str(hdfFilePath)+',already has attribute nrows and overwrite==FALSE') else: print ('hdf5 file at'+str(hdfFilePath)+',already has attribute nrows, but replacing because overwrite==True') outHDF5.root._v_attrs.nrows = nrows else: outHDF5.root._v_attrs.nrows = nrows minx = min(lon) if hasattr(outHDF5.root._v_attrs,'minx'): if(outHDF5.root._v_attrs.minx !=minx): print ('existing minx value ('+str(outHDF5.root._v_attrs.minx)+' != that calculated here '+str(minx)) if overwrite==False: print ('hdf5 file at'+str(hdfFilePath)+',already has attribute minx and overwrite==FALSE') else: print ('hdf5 file at'+str(hdfFilePath)+',already has attribute minx, but replacing because overwrite==True') outHDF5.root._v_attrs.minx = minx else: outHDF5.root._v_attrs.minx = minx maxx = max(lon) if hasattr(outHDF5.root._v_attrs,'maxx'): if(outHDF5.root._v_attrs.maxx !=maxx): print ('existing maxx value ('+str(outHDF5.root._v_attrs.maxx)+' != that calculated here '+str(maxx)) if overwrite==False: print ('hdf5 file at'+str(hdfFilePath)+',already has attribute maxx and overwrite==FALSE') else: print ('hdf5 file at'+str(hdfFilePath)+',already has attribute maxx, but replacing because overwrite==True') outHDF5.root._v_attrs.maxx = maxx else: outHDF5.root._v_attrs.maxx = maxx miny = min(lat) if hasattr(outHDF5.root._v_attrs,'miny'): if(outHDF5.root._v_attrs.miny !=miny): print ('existing maxx value ('+str(outHDF5.root._v_attrs.miny)+' != that calculated here '+str(miny)) if overwrite==False: print ('hdf5 file at'+str(hdfFilePath)+',already has attribute miny and overwrite==FALSE') else: print ('hdf5 file at'+str(hdfFilePath)+',already has attribute miny, but replacing because overwrite==True') outHDF5.root._v_attrs.miny = miny else: outHDF5.root._v_attrs.miny = miny maxy = max(lat) if hasattr(outHDF5.root._v_attrs,'maxy'): if(outHDF5.root._v_attrs.maxy !=maxy): print ('existing maxy value ('+str(outHDF5.root._v_attrs.maxy)+' != that calculated here '+str(maxy)) if overwrite==False: print ('hdf5 file at'+str(hdfFilePath)+',already has attribute maxy and overwrite==FALSE') else: print ('hdf5 file at'+str(hdfFilePath)+',already has attribute maxy, but replacing because overwrite==True') outHDF5.root._v_attrs.maxy = maxy else: outHDF5.root._v_attrs.maxy = maxy cellsizeX = lon[1] - lon[0] cellsizeY = lat[1] - lat[0] if(abs(cellsizeX - cellsizeY)>CELLTOLLERANCE): print ('Inferred cell sizes from lat '+str(cellsizeY)+' and long ' +str(cellsizeX)+' do not match') cellsize = cellsizeX if hasattr(outHDF5.root._v_attrs,'cellsize'): if(outHDF5.root._v_attrs.cellsize !=cellsize): print ('existing cellsize value ('+str(outHDF5.root._v_attrs.cellsize)+' != that calculated here '+str(cellsize)) if overwrite==False: print ('hdf5 file at'+str(hdfFilePath)+',already has attribute cellsize and overwrite==FALSE') else: print ('hdf5 file at'+str(hdfFilePath)+',already has attribute cellsize, but replacing because overwrite==True') outHDF5.root._v_attrs.cellsize = cellsize else: outHDF5.root._v_attrs.cellsize = cellsize order = outHDF5.root.data.attrs.view if hasattr(outHDF5.root._v_attrs,'order'): if(outHDF5.root._v_attrs.order !=order): print ('existing order value ('+str(outHDF5.root._v_attrs.order)+' != that calculated here '+str(order)) if overwrite==False: print ('hdf5 file at'+str(hdfFilePath)+',already has attribute order and overwrite==FALSE') else: print ('hdf5 file at'+str(hdfFilePath)+',already has attribute order, but replacing because overwrite==True') outHDF5.root._v_attrs.order = order else: outHDF5.root._v_attrs.order = order missing = missingDefault if hasattr(outHDF5.root._v_attrs,'missing'): if(outHDF5.root._v_attrs.missing !=missing): print ('existing missing value ('+str(outHDF5.root._v_attrs.missing)+' != that calculated here '+str(missing)) if overwrite==False: print ('hdf5 file at'+str(hdfFilePath)+',already has attribute missing and overwrite==FALSE') else: print ('hdf5 file at'+str(hdfFilePath)+',already has attribute missing, but replacing because overwrite==True') outHDF5.root._v_attrs.missing = missing else: outHDF5.root._v_attrs.missing = missing outHDF5.close()
def downloadBucketContents(self, bucketName, targetDirectoryPath, overwriteContent, VERBOSE=True): """ Downloads contents of an entire bucket to a specified local file. params to pass: bucketName : (string) name of bucket of interest targetDirectoryPath : (string) path to target directory. If this includes new directories, these will be built if possible overwriteContent : (logical) if the specifid pathis to an existing directory, and there are existing files with the same name as those in the bucket, do we overwrite? """ # check bucket exists on S3 if self.conn.lookup(bucketName) is None: print 'WARNING!!! requested bucket "' + str(bucketName) + '" does not exist on S3 !!!' # check target local directory exists and if not then build it if checkAndBuildPaths(targetDirectoryPath, BUILD=True) == -9999: raise RuntimeError, 'Problem building target directory "' + str(targetDirectoryPath) + '" : EXITING!!!' # get list of files already in target directory existinglocalfiles = os.listdir(targetDirectoryPath) # loop through all files in the bucket bucket = self.conn.get_bucket(bucketName) rs = bucket.list() for key in rs: # if not overwriting, check no file exists in local directory with same name as this file if overwriteContent == False: if existinglocalfiles.count(str(key.name)) > 0: if VERBOSE == True: print 'WARNING!!! file "' + str(key.name) + '" already present in local directory "' + str( targetDirectoryPath ) + '" and overwriteContent==False ' continue # if we are overwriting, check that existing file is not actually a directory if overwriteContent == True: if os.path.isdir(targetDirectoryPath + str(key.name)) == True: raise RuntimeError, 'A directory ("' + str(key.name) + '") exists at path "' + str( targetDirectoryPath ) + '" with same name as file trying to download: EXITING!!' # build full target filepath if targetDirectoryPath[-1] != "/": targetDirectoryPath = targetDirectoryPath + "/" filePathAtDestination = targetDirectoryPath + str(key.name) # now copy this file from S3 bucket to local directory key.get_contents_to_filename(filePathAtDestination) # check file has made it to destination ## first check there is even a file of this name at local destination if os.path.exists(filePathAtDestination) != True: raise RuntimeError, 'Final check revealed file "' + str( filePathAtDestination ) + '" did not copy succesfully from S3 file "' + str(key.name) + '" in bucket "' + str( bucketName ) + '"' ## then check the md5 keys match md5_s3 = key.etag.strip(str('"')) md5string = md5.new(file(filePathAtDestination).read()).hexdigest() if md5string != md5_s3: raise RuntimeError, 'Final check revealed file "' + str( filePathAtDestination ) + '" did not copy succesfully from S3 file "' + str(key.name) + '" in bucket "' + str( bucketName ) + '"'
# pre-build any necessary directories print 'Starting: ECRUNSCRIPT_CONDSIM_PREDOWNLOAD..' # import libraries from map_utils import checkAndBuildPaths from map_utils import S3 from CONDSIM_params import * import sys S = S3(keyPath) # initialise key object # make empty directory on instance to house realization hdf5 file that wil be generated print '\n\tBuilding directory: ' + realizations_path.rpartition('/')[0] checkAndBuildPaths(realizations_path.rpartition('/')[0], VERBOSE=True, BUILD=True) # download from S3 the necessary auxilliary files.. ## mcmc trace file print '\nDownloading burden traces from S3..' S3bucketname = trace_path.split('/')[-2] print '\tS3bucketname: ' + str(S3bucketname) S3filename = trace_path.split('/')[-1] print '\tS3filename: ' + str(S3filename) S.downloadFileFromBucket(S3bucketname, S3filename, trace_path, overwriteContent=False, makeDirectory=True,
if PERPIXEL == True: # download from S3 contents of bucket 'distributedoutput_perpixel', will automatically build the local directory if necessary print "\n\tDownloading contents of S3 bucket " + str( exportPathDistributed_perpixel.split("/")[2] ) + " to local directory " + exportPathDistributed_perpixel S.downloadBucketContents( exportPathDistributed_perpixel.split("/")[2], exportPathDistributed_perpixel, overwriteContent=False, VERBOSE=True, ) # build path for output to house combined per-pixel output maps print "\n\tChecking path for " + exportPathCombined_perpixel checkAndBuildPaths(exportPathCombined_perpixel, VERBOSE=True, BUILD=True) # download from S3 the other necessary files (optionally need 5km grump for burden map) print "\n\tDownloading lim5kmbnry file from S3.." S3bucketname = lim5kmbnry_path.split("/")[-2] print "\t\tS3bucketname: " + str(S3bucketname) S3filename = lim5kmbnry_path.split("/")[-1] print "\t\tS3filename: " + str(S3filename) S.downloadFileFromBucket( S3bucketname, S3filename, lim5kmbnry_path, overwriteContent=False, makeDirectory=True, VERBOSE=True ) checkAndBuildPaths(lim5kmbnry_path, VERBOSE=True, BUILD=False) if BURDEN == True: print "\n\tDownloading grump5km file from S3.." S3bucketname = grump5km_path.split("/")[-2]
# script to download to an instance, before anything executes, any necessary auxilliary files, and to # pre-build any necessary directories print 'Starting: ECRUNSCRIPT_CONDSIM_PREDOWNLOAD..' # import libraries from map_utils import checkAndBuildPaths from map_utils import S3 from CONDSIM_params import * import sys S=S3(keyPath) # initialise key object # make empty directory on instance to house realization hdf5 file that wil be generated print '\n\tBuilding directory: '+realizations_path.rpartition('/')[0] checkAndBuildPaths(realizations_path.rpartition('/')[0],VERBOSE=True,BUILD=True) # download from S3 the necessary auxilliary files.. ## mcmc trace file print '\nDownloading burden traces from S3..' S3bucketname = trace_path.split('/')[-2] print '\tS3bucketname: '+str(S3bucketname) S3filename = trace_path.split('/')[-1] print '\tS3filename: '+str(S3filename) S.downloadFileFromBucket(S3bucketname,S3filename,trace_path,overwriteContent=False,makeDirectory=True,VERBOSE=True) ## global 5km stable mask print '\nDownloading b5km stable mask from S3..' S3bucketname = lim5kmbnry_path.split('/')[-2] print '\tS3bucketname: '+str(S3bucketname)