def repo_pull_nomads(repos, filePath, timestamp, repo_path): """ Downloads forecast data from NOMADS repository using wget Args: repos: the source data in a single source from the config file, see below for example filePath: the filepath where the scripts are run ex) Q:\WR_Ensemble_dev\A_MS\Repo\scripts timestamp: datestamp + start hour, this is currently the config.file date with a static start hour of '00' repo_path: path to store all the repo data; currently 'config_file.grib_forecast_repo' Website: http://nomads.ncep.noaa.gov/txt_descriptions/CMCENS_doc.shtml http://nomads.ncep.noaa.gov/cgi-bin/filter_cmcens.pl?file=cmc_gep00.t00z.pgrb2af384&lev_surface=on&var_APCP=on&var_TMP=on&subregion=&leftlon=-98&rightlon=-88&toplat=54&bottomlat=46&dir=%2Fcmce.20160830%2F00%2Fpgrb2a http://nomads.ncep.noaa.gov/cgi-bin/filter_gens.pl?file=gec00.t00z.pgrb2anl&lev_2_m_above_ground=on&lev_surface=on&var_TMP=on&subregion=&leftlon=-98&rightlon=-88&toplat=54&bottomlat=46&dir=%2Fgefs.20160830%2F00%2Fpgrb2 #Example repos from config file, note substitution parameters (%X) in :FileName :SourceData 0:URL http://nomads.ncep.noaa.gov/cgi-bin/ 1:FileName filter_%S1.pl?file=%S2gep%E.t%Hz.pgrb2af%T&%query&subregion=&leftlon=-98&rightlon=-88&toplat=54&bottomlat=46&dir=%2F%S3.%Y%m%d%2F00%2Fpgrb2a 2:DeltaTimeStart 6 3:DeltaTimeEnd 240 4:DeltaTimeStep 6 5:StitchTimeStart 6 6:StitchTimeEnd 240 7:Grouping tem 8:Type NOMAD_GFS 9:Forecast 3 10:num_ensembles 20 :EndSourceData Returns: NULL - downloads grib files from online repository """ #build repository directory to store the date's files today_repo_path = repo_path + "/" + timestamp + "/" FrameworkLibrary.build_dir(today_repo_path) #get arguments from repos, we assume that 2 NOMADS sources won't be stitched together #(as we do with the datamart GEM regional and global model), hence only grab the first string (ie. repos[*][0]) url = repos[0][0] DeltaTimeStart = int(repos[2][0]) DeltaTimeEnd = int(repos[3][0]) DeltaTimeStep = int(repos[4][0]) Source = repos[8][0] Grouping = repos[7][0] num_ensembles = int(repos[10][0]) wget_list = [] print 'building list of files for download' for k in range(1,num_ensembles + 1): #for each ensemble member #set progress bar pbar = k/float(num_ensembles) * 40 sys.stdout.write('\r') # the exact output you're looking for: sys.stdout.write("[%-40s] %d%%" % ('='*int(pbar), pbar/40*100)) sys.stdout.flush() for j in range(DeltaTimeStart/DeltaTimeStep,DeltaTimeEnd/DeltaTimeStep + 1): #for each timestep ensemble = str(k).zfill(2) #save ensemble number in 2 digit format #Set timestep and replace in file name DeltaTime = j * DeltaTimeStep name = repos[1][0].replace('%T', str(DeltaTime).zfill(2)) #replace the ensemble number in file name name = name.replace('%E',ensemble) #replace the data request in file name if Grouping == 'met': name = name.replace('%query', 'lev_surface=on&var_APCP=on') if Grouping == 'tem': name = name.replace('%query', 'lev_2_m_above_ground=on&var_TMP') #replace the source in the file name (ie. CMC NAEFS, or GFS NAEFS) if Source == 'NOMAD_GFS': name = name.replace('%S1', 'gens') name = name.replace('%S2', '') name = name.replace('%S3', 'gefs') if Source == 'NOMAD_CMC': name = name.replace('%S1', 'cmcens') name = name.replace('%S2', 'cmc_') name = name.replace('%S3', 'cmce') #concatenate and create wget command downloadname = url + name filename = Source + '_' + Grouping + '_' + ensemble + '_' + str(DeltaTime).zfill(3) + '_' + timestamp + '.grib2' cmd = "wget -q -O " + today_repo_path + filename + " " + '"' + downloadname + '"' + " 2> NUL" #append to wget download list if file doesn't exist locally if not os.path.isfile(today_repo_path + filename): #if file does not exist locally wget_list.append(cmd) #now run wget with multiple threads, this speeds up download time considerably print '\nDownloading Files... \n' pool = multiprocessing.Pool(processes = 20) pool.map(os.system,wget_list)
def repo_pull_datamart(repos,filePath,timestamp,repo_path): """ Downloads forecast data from EC datamart repository using wget http://dd.weather.gc.ca/ Args: repos: the source data in a single source from the config file, see below for example filePath: the filepath where the scripts are run ex) Q:\WR_Ensemble_dev\A_MS\Repo\scripts timestamp: datestamp + start hour, this is currently the config.file date with a static start hour of '00' repo_path: path to store all the repo data; currently 'config_file.grib_forecast_repo' [0] :URL http://dd.weather.gc.ca/model_gem_regional/10km/grib2/%H/%T/ http://dd.weather.gc.ca/model_gem_global/25km/grib2/lat_lon/%H/%T/ [1] :FileName CMC_reg_APCP_SFC_0_ps10km_%Y%m%d%H_P%T.grib2 CMC_glb_APCP_SFC_0_latlon.24x.24_%Y%m%d%H_P%T.grib2 [2] :DeltaTimeStart 3 3 [3] :DeltaTimeEnd 48 240 [4] :DeltaTimeStep 3 3 [5] :StitchTimeStart 3 48 [6] :StitchTimeEnd 48 240 [7] :Grouping met met [8] :Type GEM GEM [9] :Forecast 1 1 Returns: NULL - downloads grib files from online repository """ #build repository directory to store the date's files today_repo_path = repo_path + "/" + timestamp + "/" FrameworkLibrary.build_dir(today_repo_path) #for each of the 'series' that are being stitched together (typically 1 or 2) for i, url in enumerate(repos[0]): DeltaTimeStart = int(repos[2][i]) DeltaTimeEnd = int(repos[3][i]) DeltaTimeStep = int(repos[4][i]) #loop through the time series for j in range(DeltaTimeStart/DeltaTimeStep,DeltaTimeEnd/DeltaTimeStep + 1): #set progress bar pbar = (j+1-DeltaTimeStart/DeltaTimeStep)/float((DeltaTimeEnd/DeltaTimeStep + 1)-DeltaTimeStart/DeltaTimeStep) * 40 sys.stdout.write('\r') # the exact output you're looking for: sys.stdout.write("[%-40s] %d%%" % ('='*int(pbar), pbar/40*100)) sys.stdout.flush() DeltaTime = j * DeltaTimeStep #replace %T with the deltaT url = repos[0][i].replace('%T', str(DeltaTime).zfill(3)) name = repos[1][i].replace('%T', str(DeltaTime).zfill(3)) filename = url + name #run wget if not os.path.isfile(today_repo_path + name): #if file does not exist locally try: #download if remote file exists urllib2.urlopen(filename) #command to see if remote file can be opened os.system("wget -q -O " + today_repo_path + name + " " + filename + " 2> NUL") #use wget to actually download the file except urllib2.URLError as e: #do nothing if remote file doesn't exist print " Error: File does not exist locally or remotely" print "\n"