def Run_Framework(): #get arguments class args(object): pass data = args() parser = argparse.ArgumentParser() parser.add_argument('-c', '--Config', help='Full path to the configuration file.') parser.add_argument( '-m', '--ModelRun', help='Type of model run: Spinup,DefaultHindcast,HindcastAdjust,Forecast' ) parser.parse_args(namespace=data) ## read configuration file config_file = FrameworkLibrary.ConfigParse( data.Config) #config_file is a class that stores all parameters model_run = data.ModelRun #= set inital working directory to repository root folder os.chdir(config_file.repository_directory) ## ===== run operational framework # if Update Configuration File (specifically the hindcast and forecast dates) if model_run == "UpdateConfig": print "\n===============Updating Configuration File with Today's dates===================\n" FrameworkLibrary.UpdateConfig(config_file) #= spin up elif model_run == "Spinup": FrameworkLibrary.spin_up(config_file) # hindcast elif model_run == "DefaultHindcast": FrameworkLibrary.hindcast(config_file) # Forecast elif model_run == "Forecast": FrameworkLibrary.forecast(config_file) # Accept and Copy elif model_run == "AcceptAndCopy": FrameworkLibrary.AcceptAndCopy(config_file) else: print "\noptions selected are not correct. please review configuration settings.\n"
def Run_Framework(): # get arguments class args(object): pass data = args() parser = argparse.ArgumentParser() parser.add_argument("-c", "--Config", help="Full path to the configuration file.") parser.add_argument("-m", "--ModelRun", help="Type of model run: Spinup,DefaultHindcast,HindcastAdjust,Forecast") parser.parse_args(namespace=data) ## read configuration file config_file = FrameworkLibrary.ConfigParse(data.Config) # config_file is a class that stores all parameters model_run = data.ModelRun # = set inital working directory to repository root folder os.chdir(config_file.repository_directory) ## ===== run operational framework # if Update Configuration File (specifically the hindcast and forecast dates) if model_run == "UpdateConfig": print "\n===============Updating Configuration File with Today's dates===================\n" FrameworkLibrary.UpdateConfig(config_file) # = spin up elif model_run == "Spinup": FrameworkLibrary.spin_up(config_file) # hindcast elif model_run == "DefaultHindcast": FrameworkLibrary.hindcast(config_file) # Forecast elif model_run == "Forecast": FrameworkLibrary.forecast(config_file) # Accept and Copy elif model_run == "AcceptAndCopy": FrameworkLibrary.AcceptAndCopy(config_file) else: print "\noptions selected are not correct. please review configuration settings.\n"
def Download_Datamart_GEMHindcast(config_file, type, RepoPath): """ Downloads forecast data from the EC datamart, converts and appends it to existing data. This is meant to update hindcast meteorological data up to the current date before the hindcast is run. It is currently only used for temperature data. Currently the best way to get gridded temperature data with good temporal resolution is to use the prior days' forecast. Args: config_file: see class ConfigParse() type: string to determine type of ReAnalysis Data; currently CaPA is the only reanalysis that exists RepoPath: directory where the reanalysis hindcast r2c file is stored and the reanalysis grib files are stored Returns: NULL - but downloads, converts and appends reanalysis data to r2c files """ #Initialize some useful variables timeVar = FrameworkLibrary.getDateTime(hours = 0) #get today at time = 0 timestamp = timeVar.strftime("%Y%m%d%H") ScriptDir = config_file.scripts_directory #define server/model defaults if type == 'GEMTemps': url = 'http://dd.weather.gc.ca/model_gem_regional/10km/grib2/' filename_nomenclature = 'CMC_reg_TMP_TGL_2_ps10km_' forecast_periods = [00,06,12,18] #the forecast is produced 4 times a day time_periods = [000,003] # want to grab these hours to stitch together else: raise ValueError('Source type is not defined. Only "GEMTemps" forecast/hindcast data can currently be downloaded') #the model data is only stored online for today and yesterday #if this changes, then you will need to modify the dates now = datetime.datetime.now() yesterday = now - datetime.timedelta(days=1) now_datestamp = now.strftime("%Y%m%d") yesterday_datestamp = yesterday.strftime("%Y%m%d") dates = [yesterday_datestamp,now_datestamp] #Download grib2 files from DataMart ****************************************************** #While an online version exists and a local version does not download then repeat (hours 000 & 003 for all four forecasts) for k,day in enumerate(dates): for i,startperiod in enumerate(forecast_periods): for j,starthour in enumerate(time_periods): filename = filename_nomenclature + day + str(startperiod).zfill(2) +'_P' + str(starthour).zfill(3) + '.grib2' website = url + str(startperiod).zfill(2) + '/' + str(starthour).zfill(3) + '/' + filename if os.path.exists(os.path.join(RepoPath,filename)): #check if file already exists in local directory lastfile = filename else: try: #download if remote file exists urllib2.urlopen(website) #command to see if remote file can be opened os.system("wget -O " + os.path.join(RepoPath,filename) + " " + website) #use wget to actually download the file lastfile = os.path.join(RepoPath,filename) except urllib2.URLError as e: #do nothing if remote file doesn't exist pass print "All of the files have been downloaded from:\n" + url #get the timestamp of the last file pattern = filename_nomenclature + "(\d+)_P(\d+).grib2" m = re.search(pattern,lastfile) if m: lasttimestring = m.groups()[0] forecast_hour = int(m.groups()[1]) lasttimestep = datetime.datetime.strptime(lasttimestring,"%Y%m%d%H") + datetime.timedelta(hours=forecast_hour) #return values if the pattern was found, this will give an error if the pattern doesn't match #this error is intentional because the user must troubleshoot it if this occurs return lasttimestep, filename_nomenclature
def Download_Datamart_ReAnalysisHindcast(config_file, type, RepoPath): """ Downloads reanalysis data from the EC datamart, converts and appends it to existing data. This is meant to update hindcast meteorological data up to the current date before the hindcast is run. It is currently only used for CaPA data. Args: config_file: see class ConfigParse() type: string to determine type of ReAnalysis Data; currently CaPA is the only reanalysis that exists RepoPath: directory where the reanalysis hindcast r2c file is stored and the reanalysis grib files are stored Returns: NULL - but downloads, converts and appends reanalysis data to r2c files """ #Initialize some useful variables timeVar = FrameworkLibrary.getDateTime(hours = 0) #get today at time = 0 timestamp = timeVar.strftime("%Y%m%d%H") ScriptDir = config_file.scripts_directory #define server names if type == 'CaPA': url = 'http://dd.weather.gc.ca/analysis/precip/rdpa/grib2/polar_stereographic/06/' filename_nomenclature = 'CMC_RDPA_APCP-006-0700cutoff_SFC_0_ps10km_' else: raise ValueError('Source type is not defined. Only "CaPA" hindcast data can currently be downloaded') #get list of files on the server #http://stackoverflow.com/questions/10875215/python-urllib-downloading-contents-of-an-online-directory urlpath = urllib2.urlopen(url) server_data = urlpath.read() filename_pattern = re.compile('"(' + filename_nomenclature + '.+.grib2)"') filelist = filename_pattern.findall(server_data) print "Downloading grib files from DataMart..." #for all the files on the datamart for s,name in enumerate(filelist): try: if not os.path.exists(os.path.join(RepoPath, name)): #if file doesn't exist locally, then download os.system("wget -O " + os.path.join(RepoPath, name) + " " + url + "/" + name) except: pass print "All of the files have been downloaded from:\n" + url #get the timestamp of the last file pattern = filename_nomenclature + "(\d+)(_\d+.grib2)" m = re.search(pattern,filelist[-1]) #only look at the last file in the list if m: lasttimestring = m.groups()[0] lasttimestep = datetime.datetime.strptime(lasttimestring,"%Y%m%d%H") suffix = m.groups()[1] grib_path_string = os.path.join(RepoPath,filename_nomenclature + "%Y%m%d%H" + suffix) #return values if the pattern was found, this will give an error if the pattern doesn't match #this error is intentional because the user must troubleshoot it if this occurs return lasttimestep, grib_path_string
def repo_pull_nomads(repos, filePath, timestamp, repo_path): """ Downloads forecast data from NOMADS repository using wget Args: repos: the source data in a single source from the config file, see below for example filePath: the filepath where the scripts are run ex) Q:\WR_Ensemble_dev\A_MS\Repo\scripts timestamp: datestamp + start hour, this is currently the config.file date with a static start hour of '00' repo_path: path to store all the repo data; currently 'config_file.grib_forecast_repo' Website: http://nomads.ncep.noaa.gov/txt_descriptions/CMCENS_doc.shtml http://nomads.ncep.noaa.gov/cgi-bin/filter_cmcens.pl?file=cmc_gep00.t00z.pgrb2af384&lev_surface=on&var_APCP=on&var_TMP=on&subregion=&leftlon=-98&rightlon=-88&toplat=54&bottomlat=46&dir=%2Fcmce.20160830%2F00%2Fpgrb2a http://nomads.ncep.noaa.gov/cgi-bin/filter_gens.pl?file=gec00.t00z.pgrb2anl&lev_2_m_above_ground=on&lev_surface=on&var_TMP=on&subregion=&leftlon=-98&rightlon=-88&toplat=54&bottomlat=46&dir=%2Fgefs.20160830%2F00%2Fpgrb2 #Example repos from config file, note substitution parameters (%X) in :FileName :SourceData 0:URL http://nomads.ncep.noaa.gov/cgi-bin/ 1:FileName filter_%S1.pl?file=%S2gep%E.t%Hz.pgrb2af%T&%query&subregion=&leftlon=-98&rightlon=-88&toplat=54&bottomlat=46&dir=%2F%S3.%Y%m%d%2F00%2Fpgrb2a 2:DeltaTimeStart 6 3:DeltaTimeEnd 240 4:DeltaTimeStep 6 5:StitchTimeStart 6 6:StitchTimeEnd 240 7:Grouping tem 8:Type NOMAD_GFS 9:Forecast 3 10:num_ensembles 20 :EndSourceData Returns: NULL - downloads grib files from online repository """ #build repository directory to store the date's files today_repo_path = repo_path + "/" + timestamp + "/" FrameworkLibrary.build_dir(today_repo_path) #get arguments from repos, we assume that 2 NOMADS sources won't be stitched together #(as we do with the datamart GEM regional and global model), hence only grab the first string (ie. repos[*][0]) url = repos[0][0] DeltaTimeStart = int(repos[2][0]) DeltaTimeEnd = int(repos[3][0]) DeltaTimeStep = int(repos[4][0]) Source = repos[8][0] Grouping = repos[7][0] num_ensembles = int(repos[10][0]) wget_list = [] print 'building list of files for download' for k in range(1,num_ensembles + 1): #for each ensemble member #set progress bar pbar = k/float(num_ensembles) * 40 sys.stdout.write('\r') # the exact output you're looking for: sys.stdout.write("[%-40s] %d%%" % ('='*int(pbar), pbar/40*100)) sys.stdout.flush() for j in range(DeltaTimeStart/DeltaTimeStep,DeltaTimeEnd/DeltaTimeStep + 1): #for each timestep ensemble = str(k).zfill(2) #save ensemble number in 2 digit format #Set timestep and replace in file name DeltaTime = j * DeltaTimeStep name = repos[1][0].replace('%T', str(DeltaTime).zfill(2)) #replace the ensemble number in file name name = name.replace('%E',ensemble) #replace the data request in file name if Grouping == 'met': name = name.replace('%query', 'lev_surface=on&var_APCP=on') if Grouping == 'tem': name = name.replace('%query', 'lev_2_m_above_ground=on&var_TMP') #replace the source in the file name (ie. CMC NAEFS, or GFS NAEFS) if Source == 'NOMAD_GFS': name = name.replace('%S1', 'gens') name = name.replace('%S2', '') name = name.replace('%S3', 'gefs') if Source == 'NOMAD_CMC': name = name.replace('%S1', 'cmcens') name = name.replace('%S2', 'cmc_') name = name.replace('%S3', 'cmce') #concatenate and create wget command downloadname = url + name filename = Source + '_' + Grouping + '_' + ensemble + '_' + str(DeltaTime).zfill(3) + '_' + timestamp + '.grib2' cmd = "wget -q -O " + today_repo_path + filename + " " + '"' + downloadname + '"' + " 2> NUL" #append to wget download list if file doesn't exist locally if not os.path.isfile(today_repo_path + filename): #if file does not exist locally wget_list.append(cmd) #now run wget with multiple threads, this speeds up download time considerably print '\nDownloading Files... \n' pool = multiprocessing.Pool(processes = 20) pool.map(os.system,wget_list)
def repo_pull_datamart(repos,filePath,timestamp,repo_path): """ Downloads forecast data from EC datamart repository using wget http://dd.weather.gc.ca/ Args: repos: the source data in a single source from the config file, see below for example filePath: the filepath where the scripts are run ex) Q:\WR_Ensemble_dev\A_MS\Repo\scripts timestamp: datestamp + start hour, this is currently the config.file date with a static start hour of '00' repo_path: path to store all the repo data; currently 'config_file.grib_forecast_repo' [0] :URL http://dd.weather.gc.ca/model_gem_regional/10km/grib2/%H/%T/ http://dd.weather.gc.ca/model_gem_global/25km/grib2/lat_lon/%H/%T/ [1] :FileName CMC_reg_APCP_SFC_0_ps10km_%Y%m%d%H_P%T.grib2 CMC_glb_APCP_SFC_0_latlon.24x.24_%Y%m%d%H_P%T.grib2 [2] :DeltaTimeStart 3 3 [3] :DeltaTimeEnd 48 240 [4] :DeltaTimeStep 3 3 [5] :StitchTimeStart 3 48 [6] :StitchTimeEnd 48 240 [7] :Grouping met met [8] :Type GEM GEM [9] :Forecast 1 1 Returns: NULL - downloads grib files from online repository """ #build repository directory to store the date's files today_repo_path = repo_path + "/" + timestamp + "/" FrameworkLibrary.build_dir(today_repo_path) #for each of the 'series' that are being stitched together (typically 1 or 2) for i, url in enumerate(repos[0]): DeltaTimeStart = int(repos[2][i]) DeltaTimeEnd = int(repos[3][i]) DeltaTimeStep = int(repos[4][i]) #loop through the time series for j in range(DeltaTimeStart/DeltaTimeStep,DeltaTimeEnd/DeltaTimeStep + 1): #set progress bar pbar = (j+1-DeltaTimeStart/DeltaTimeStep)/float((DeltaTimeEnd/DeltaTimeStep + 1)-DeltaTimeStart/DeltaTimeStep) * 40 sys.stdout.write('\r') # the exact output you're looking for: sys.stdout.write("[%-40s] %d%%" % ('='*int(pbar), pbar/40*100)) sys.stdout.flush() DeltaTime = j * DeltaTimeStep #replace %T with the deltaT url = repos[0][i].replace('%T', str(DeltaTime).zfill(3)) name = repos[1][i].replace('%T', str(DeltaTime).zfill(3)) filename = url + name #run wget if not os.path.isfile(today_repo_path + name): #if file does not exist locally try: #download if remote file exists urllib2.urlopen(filename) #command to see if remote file can be opened os.system("wget -q -O " + today_repo_path + name + " " + filename + " 2> NUL") #use wget to actually download the file except urllib2.URLError as e: #do nothing if remote file doesn't exist print " Error: File does not exist locally or remotely" print "\n"
def Download_Datamart_GEMHindcast(config_file, type, RepoPath): """ Downloads forecast data from the EC datamart, converts and appends it to existing data. This is meant to update hindcast meteorological data up to the current date before the hindcast is run. It is currently only used for temperature data. Currently the best way to get gridded temperature data with good temporal resolution is to use the prior days' forecast. Args: config_file: see class ConfigParse() type: string to determine type of ReAnalysis Data; currently CaPA is the only reanalysis that exists RepoPath: directory where the reanalysis hindcast r2c file is stored and the reanalysis grib files are stored Returns: NULL - but downloads, converts and appends reanalysis data to r2c files """ #Initialize some useful variables timeVar = FrameworkLibrary.getDateTime(hours = 0) #get today at time = 0 timestamp = timeVar.strftime("%Y%m%d%H") ScriptDir = config_file.scripts_directory #define server/model defaults if type == 'GEMTemps': url = 'http://dd.weather.gc.ca/model_gem_regional/10km/grib2/' filename_nomenclature = 'CMC_reg_TMP_TGL_2_ps10km_' forecast_periods = [00,06,12,18] #the forecast is produced 4 times a day time_periods = [000,003] # want to grab these hours to stitch together else: raise ValueError('Source type is not defined. Only "GEMTemps" forecast/hindcast data can currently be downloaded') #the model data is only stored online for today and yesterday #if this changes, then you will need to modify the dates now = datetime.datetime.now() yesterday = now - datetime.timedelta(days=1) now_datestamp = now.strftime("%Y%m%d") yesterday_datestamp = yesterday.strftime("%Y%m%d") dates = [yesterday_datestamp,now_datestamp] #Download grib2 files from DataMart ****************************************************** #While an online version exists and a local version does not download then repeat (hours 000 & 003 for all four forecasts) for k,day in enumerate(dates): for i,startperiod in enumerate(forecast_periods): for j,starthour in enumerate(time_periods): filename = filename_nomenclature + day + str(startperiod).zfill(2) +'_P' + str(starthour).zfill(3) + '.grib2' website = url + str(startperiod).zfill(2) + '/' + str(starthour).zfill(3) + '/' + filename if os.path.exists(os.path.join(RepoPath,filename)): #check if file already exists in local directory lastfile = filename else: try: #download if remote file exists urllib2.urlopen(website) #command to see if remote file can be opened os.system("wget -O " + os.path.join(RepoPath,filename) + " " + website) #use wget to actually download the file lastfile = os.path.join(RepoPath,filename) except urllib2.URLError as e: #do nothing if remote file doesn't exist pass print "All of the files have been downloaded from:\n" + url #get the timestamp of the last file pattern = filename_nomenclature + "(\d+)_P(\d+).grib2" m = re.search(pattern,lastfile) if m: lasttimestring = m.groups()[0] forecast_hour = int(m.groups()[1]) lasttimestep = datetime.datetime.strptime(lasttimestring,"%Y%m%d%H") + datetime.timedelta(hours=forecast_hour) #return values if the pattern was found, this will give an error if the pattern doesn't match #this error is intentional because the user must troubleshoot it if this occurs return lasttimestep, filename_nomenclature
def Download_Datamart_ReAnalysisHindcast(config_file, type, RepoPath): """ Downloads reanalysis data from the EC datamart, converts and appends it to existing data. This is meant to update hindcast meteorological data up to the current date before the hindcast is run. It is currently only used for CaPA data. Args: config_file: see class ConfigParse() type: string to determine type of ReAnalysis Data; currently CaPA is the only reanalysis that exists RepoPath: directory where the reanalysis hindcast r2c file is stored and the reanalysis grib files are stored Returns: NULL - but downloads, converts and appends reanalysis data to r2c files """ #Initialize some useful variables timeVar = FrameworkLibrary.getDateTime(hours = 0) #get today at time = 0 timestamp = timeVar.strftime("%Y%m%d%H") ScriptDir = config_file.scripts_directory #define server names if type == 'CaPA': url = 'http://dd.weather.gc.ca/analysis/precip/rdpa/grib2/polar_stereographic/06/' filename_nomenclature = 'CMC_RDPA_APCP-006-0700cutoff_SFC_0_ps10km_' else: raise ValueError('Source type is not defined. Only "CaPA" hindcast data can currently be downloaded') #get list of files on the server #http://stackoverflow.com/questions/10875215/python-urllib-downloading-contents-of-an-online-directory urlpath = urllib2.urlopen(url) server_data = urlpath.read() filename_pattern = re.compile('"(' + filename_nomenclature + '.+.grib2)"') filelist = filename_pattern.findall(server_data) print "Downloading grib files from DataMart..." #for all the files on the datamart for s,name in enumerate(filelist): try: if not os.path.exists(os.path.join(RepoPath, name)): #if file doesn't exist locally, then download os.system("wget -O " + os.path.join(RepoPath, name) + " " + url + "/" + name) except: pass print "All of the files have been downloaded from:\n" + url #get the timestamp of the last file pattern = filename_nomenclature + "(\d+)(_\d+.grib2)" m = re.search(pattern,filelist[-1]) #only look at the last file in the list if m: lasttimestring = m.groups()[0] lasttimestep = datetime.datetime.strptime(lasttimestring,"%Y%m%d%H") suffix = m.groups()[1] grib_path_string = os.path.join(RepoPath,filename_nomenclature + "%Y%m%d%H" + suffix) #return values if the pattern was found, this will give an error if the pattern doesn't match #this error is intentional because the user must troubleshoot it if this occurs return lasttimestep, grib_path_string
def repo_pull_nomads(repos, filePath, timestamp, repo_path): """ Downloads forecast data from NOMADS repository using wget Args: repos: the source data in a single source from the config file, see below for example filePath: the filepath where the scripts are run ex) Q:\WR_Ensemble_dev\A_MS\Repo\scripts timestamp: datestamp + start hour, this is currently the config.file date with a static start hour of '00' repo_path: path to store all the repo data; currently 'config_file.grib_forecast_repo' Website: http://nomads.ncep.noaa.gov/txt_descriptions/CMCENS_doc.shtml http://nomads.ncep.noaa.gov/cgi-bin/filter_cmcens.pl?file=cmc_gep00.t00z.pgrb2af384&lev_surface=on&var_APCP=on&var_TMP=on&subregion=&leftlon=-98&rightlon=-88&toplat=54&bottomlat=46&dir=%2Fcmce.20160830%2F00%2Fpgrb2a http://nomads.ncep.noaa.gov/cgi-bin/filter_gens.pl?file=gec00.t00z.pgrb2anl&lev_2_m_above_ground=on&lev_surface=on&var_TMP=on&subregion=&leftlon=-98&rightlon=-88&toplat=54&bottomlat=46&dir=%2Fgefs.20160830%2F00%2Fpgrb2 #Example repos from config file, note substitution parameters (%X) in :FileName :SourceData 0:URL http://nomads.ncep.noaa.gov/cgi-bin/ 1:FileName filter_%S1.pl?file=%S2gep%E.t%Hz.pgrb2af%T&%query&subregion=&leftlon=-98&rightlon=-88&toplat=54&bottomlat=46&dir=%2F%S3.%Y%m%d%2F00%2Fpgrb2a 2:DeltaTimeStart 6 3:DeltaTimeEnd 240 4:DeltaTimeStep 6 5:StitchTimeStart 6 6:StitchTimeEnd 240 7:Grouping tem 8:Type NOMAD_GFS 9:Forecast 3 10:num_ensembles 20 :EndSourceData Returns: NULL - downloads grib files from online repository """ #build repository directory to store the date's files today_repo_path = repo_path + "/" + timestamp + "/" FrameworkLibrary.build_dir(today_repo_path) #get arguments from repos, we assume that 2 NOMADS sources won't be stitched together #(as we do with the datamart GEM regional and global model), hence only grab the first string (ie. repos[*][0]) url = repos[0][0] DeltaTimeStart = int(repos[2][0]) DeltaTimeEnd = int(repos[3][0]) DeltaTimeStep = int(repos[4][0]) Source = repos[8][0] Grouping = repos[7][0] num_ensembles = int(repos[10][0]) wget_list = [] print 'building list of files for download' for k in range(1,num_ensembles + 1): #for each ensemble member #set progress bar pbar = k/float(num_ensembles) * 40 sys.stdout.write('\r') # the exact output you're looking for: sys.stdout.write("[%-40s] %d%%" % ('='*int(pbar), pbar/40*100)) sys.stdout.flush() for j in range(DeltaTimeStart/DeltaTimeStep,DeltaTimeEnd/DeltaTimeStep + 1): #for each timestep ensemble = str(k).zfill(2) #save ensemble number in 2 digit format #Set timestep and replace in file name DeltaTime = j * DeltaTimeStep name = repos[1][0].replace('%T', str(DeltaTime).zfill(2)) #replace the ensemble number in file name name = name.replace('%E',ensemble) #replace the data request in file name if Grouping == 'met': name = name.replace('%query', 'lev_surface=on&var_APCP=on') if Grouping == 'tem': name = name.replace('%query', 'lev_2_m_above_ground=on&var_TMP') #replace the source in the file name (ie. CMC NAEFS, or GFS NAEFS) if Source == 'NOMAD_GFS': name = name.replace('%S1', 'gens') name = name.replace('%S2', '') name = name.replace('%S3', 'gefs') if Source == 'NOMAD_CMC': name = name.replace('%S1', 'cmcens') name = name.replace('%S2', 'cmc_') name = name.replace('%S3', 'cmce') #concatenate and create wget command downloadname = url + name filename = Source + '_' + Grouping + '_' + ensemble + '_' + str(DeltaTime).zfill(3) + '_' + timestamp + '.grib2' cmd = "wget -q -O " + today_repo_path + filename + " " + '"' + downloadname + '"' + " 2> NUL" #append to wget download list if file doesn't exist locally if not os.path.isfile(today_repo_path + filename): #if file does not exist locally wget_list.append(cmd) #now run wget with multiple threads, this speeds up download time considerably print '\nDownloading Files... \n' pool = multiprocessing.Pool(processes = 20) pool.map(os.system,wget_list)
def repo_pull_datamart(repos,filePath,timestamp,repo_path): """ Downloads forecast data from EC datamart repository using wget http://dd.weather.gc.ca/ Args: repos: the source data in a single source from the config file, see below for example filePath: the filepath where the scripts are run ex) Q:\WR_Ensemble_dev\A_MS\Repo\scripts timestamp: datestamp + start hour, this is currently the config.file date with a static start hour of '00' repo_path: path to store all the repo data; currently 'config_file.grib_forecast_repo' [0] :URL http://dd.weather.gc.ca/model_gem_regional/10km/grib2/%H/%T/ http://dd.weather.gc.ca/model_gem_global/25km/grib2/lat_lon/%H/%T/ [1] :FileName CMC_reg_APCP_SFC_0_ps10km_%Y%m%d%H_P%T.grib2 CMC_glb_APCP_SFC_0_latlon.24x.24_%Y%m%d%H_P%T.grib2 [2] :DeltaTimeStart 3 3 [3] :DeltaTimeEnd 48 240 [4] :DeltaTimeStep 3 3 [5] :StitchTimeStart 3 48 [6] :StitchTimeEnd 48 240 [7] :Grouping met met [8] :Type GEM GEM [9] :Forecast 1 1 Returns: NULL - downloads grib files from online repository """ #build repository directory to store the date's files today_repo_path = repo_path + "/" + timestamp + "/" FrameworkLibrary.build_dir(today_repo_path) #for each of the 'series' that are being stitched together (typically 1 or 2) for i, url in enumerate(repos[0]): DeltaTimeStart = int(repos[2][i]) DeltaTimeEnd = int(repos[3][i]) DeltaTimeStep = int(repos[4][i]) #loop through the time series for j in range(DeltaTimeStart/DeltaTimeStep,DeltaTimeEnd/DeltaTimeStep + 1): #set progress bar pbar = (j+1-DeltaTimeStart/DeltaTimeStep)/float((DeltaTimeEnd/DeltaTimeStep + 1)-DeltaTimeStart/DeltaTimeStep) * 40 sys.stdout.write('\r') # the exact output you're looking for: sys.stdout.write("[%-40s] %d%%" % ('='*int(pbar), pbar/40*100)) sys.stdout.flush() DeltaTime = j * DeltaTimeStep #replace %T with the deltaT url = repos[0][i].replace('%T', str(DeltaTime).zfill(3)) name = repos[1][i].replace('%T', str(DeltaTime).zfill(3)) filename = url + name #run wget if not os.path.isfile(today_repo_path + name): #if file does not exist locally try: #download if remote file exists urllib2.urlopen(filename) #command to see if remote file can be opened os.system("wget -q -O " + today_repo_path + name + " " + filename + " 2> NUL") #use wget to actually download the file except urllib2.URLError as e: #do nothing if remote file doesn't exist print " Error: File does not exist locally or remotely" print "\n"