def Download_Datamart_GEMHindcast(config_file, type, RepoPath): """ Downloads forecast data from the EC datamart, converts and appends it to existing data. This is meant to update hindcast meteorological data up to the current date before the hindcast is run. It is currently only used for temperature data. Currently the best way to get gridded temperature data with good temporal resolution is to use the prior days' forecast. Args: config_file: see class ConfigParse() type: string to determine type of ReAnalysis Data; currently CaPA is the only reanalysis that exists RepoPath: directory where the reanalysis hindcast r2c file is stored and the reanalysis grib files are stored Returns: NULL - but downloads, converts and appends reanalysis data to r2c files """ #Initialize some useful variables timeVar = FrameworkLibrary.getDateTime(hours = 0) #get today at time = 0 timestamp = timeVar.strftime("%Y%m%d%H") ScriptDir = config_file.scripts_directory #define server/model defaults if type == 'GEMTemps': url = 'http://dd.weather.gc.ca/model_gem_regional/10km/grib2/' filename_nomenclature = 'CMC_reg_TMP_TGL_2_ps10km_' forecast_periods = [00,06,12,18] #the forecast is produced 4 times a day time_periods = [000,003] # want to grab these hours to stitch together else: raise ValueError('Source type is not defined. Only "GEMTemps" forecast/hindcast data can currently be downloaded') #the model data is only stored online for today and yesterday #if this changes, then you will need to modify the dates now = datetime.datetime.now() yesterday = now - datetime.timedelta(days=1) now_datestamp = now.strftime("%Y%m%d") yesterday_datestamp = yesterday.strftime("%Y%m%d") dates = [yesterday_datestamp,now_datestamp] #Download grib2 files from DataMart ****************************************************** #While an online version exists and a local version does not download then repeat (hours 000 & 003 for all four forecasts) for k,day in enumerate(dates): for i,startperiod in enumerate(forecast_periods): for j,starthour in enumerate(time_periods): filename = filename_nomenclature + day + str(startperiod).zfill(2) +'_P' + str(starthour).zfill(3) + '.grib2' website = url + str(startperiod).zfill(2) + '/' + str(starthour).zfill(3) + '/' + filename if os.path.exists(os.path.join(RepoPath,filename)): #check if file already exists in local directory lastfile = filename else: try: #download if remote file exists urllib2.urlopen(website) #command to see if remote file can be opened os.system("wget -O " + os.path.join(RepoPath,filename) + " " + website) #use wget to actually download the file lastfile = os.path.join(RepoPath,filename) except urllib2.URLError as e: #do nothing if remote file doesn't exist pass print "All of the files have been downloaded from:\n" + url #get the timestamp of the last file pattern = filename_nomenclature + "(\d+)_P(\d+).grib2" m = re.search(pattern,lastfile) if m: lasttimestring = m.groups()[0] forecast_hour = int(m.groups()[1]) lasttimestep = datetime.datetime.strptime(lasttimestring,"%Y%m%d%H") + datetime.timedelta(hours=forecast_hour) #return values if the pattern was found, this will give an error if the pattern doesn't match #this error is intentional because the user must troubleshoot it if this occurs return lasttimestep, filename_nomenclature
def Download_Datamart_GEMHindcast(config_file, type, RepoPath): """ Downloads forecast data from the EC datamart, converts and appends it to existing data. This is meant to update hindcast meteorological data up to the current date before the hindcast is run. It is currently only used for temperature data. Currently the best way to get gridded temperature data with good temporal resolution is to use the prior days' forecast. Args: config_file: see class ConfigParse() type: string to determine type of ReAnalysis Data; currently CaPA is the only reanalysis that exists RepoPath: directory where the reanalysis hindcast r2c file is stored and the reanalysis grib files are stored Returns: NULL - but downloads, converts and appends reanalysis data to r2c files """ #Initialize some useful variables timeVar = FrameworkLibrary.getDateTime(hours = 0) #get today at time = 0 timestamp = timeVar.strftime("%Y%m%d%H") ScriptDir = config_file.scripts_directory #define server/model defaults if type == 'GEMTemps': url = 'http://dd.weather.gc.ca/model_gem_regional/10km/grib2/' filename_nomenclature = 'CMC_reg_TMP_TGL_2_ps10km_' forecast_periods = [00,06,12,18] #the forecast is produced 4 times a day time_periods = [000,003] # want to grab these hours to stitch together else: raise ValueError('Source type is not defined. Only "GEMTemps" forecast/hindcast data can currently be downloaded') #the model data is only stored online for today and yesterday #if this changes, then you will need to modify the dates now = datetime.datetime.now() yesterday = now - datetime.timedelta(days=1) now_datestamp = now.strftime("%Y%m%d") yesterday_datestamp = yesterday.strftime("%Y%m%d") dates = [yesterday_datestamp,now_datestamp] #Download grib2 files from DataMart ****************************************************** #While an online version exists and a local version does not download then repeat (hours 000 & 003 for all four forecasts) for k,day in enumerate(dates): for i,startperiod in enumerate(forecast_periods): for j,starthour in enumerate(time_periods): filename = filename_nomenclature + day + str(startperiod).zfill(2) +'_P' + str(starthour).zfill(3) + '.grib2' website = url + str(startperiod).zfill(2) + '/' + str(starthour).zfill(3) + '/' + filename if os.path.exists(os.path.join(RepoPath,filename)): #check if file already exists in local directory lastfile = filename else: try: #download if remote file exists urllib2.urlopen(website) #command to see if remote file can be opened os.system("wget -O " + os.path.join(RepoPath,filename) + " " + website) #use wget to actually download the file lastfile = os.path.join(RepoPath,filename) except urllib2.URLError as e: #do nothing if remote file doesn't exist pass print "All of the files have been downloaded from:\n" + url #get the timestamp of the last file pattern = filename_nomenclature + "(\d+)_P(\d+).grib2" m = re.search(pattern,lastfile) if m: lasttimestring = m.groups()[0] forecast_hour = int(m.groups()[1]) lasttimestep = datetime.datetime.strptime(lasttimestring,"%Y%m%d%H") + datetime.timedelta(hours=forecast_hour) #return values if the pattern was found, this will give an error if the pattern doesn't match #this error is intentional because the user must troubleshoot it if this occurs return lasttimestep, filename_nomenclature
def Download_Datamart_ReAnalysisHindcast(config_file, type, RepoPath): """ Downloads reanalysis data from the EC datamart, converts and appends it to existing data. This is meant to update hindcast meteorological data up to the current date before the hindcast is run. It is currently only used for CaPA data. Args: config_file: see class ConfigParse() type: string to determine type of ReAnalysis Data; currently CaPA is the only reanalysis that exists RepoPath: directory where the reanalysis hindcast r2c file is stored and the reanalysis grib files are stored Returns: NULL - but downloads, converts and appends reanalysis data to r2c files """ #Initialize some useful variables timeVar = FrameworkLibrary.getDateTime(hours = 0) #get today at time = 0 timestamp = timeVar.strftime("%Y%m%d%H") ScriptDir = config_file.scripts_directory #define server names if type == 'CaPA': url = 'http://dd.weather.gc.ca/analysis/precip/rdpa/grib2/polar_stereographic/06/' filename_nomenclature = 'CMC_RDPA_APCP-006-0700cutoff_SFC_0_ps10km_' else: raise ValueError('Source type is not defined. Only "CaPA" hindcast data can currently be downloaded') #get list of files on the server #http://stackoverflow.com/questions/10875215/python-urllib-downloading-contents-of-an-online-directory urlpath = urllib2.urlopen(url) server_data = urlpath.read() filename_pattern = re.compile('"(' + filename_nomenclature + '.+.grib2)"') filelist = filename_pattern.findall(server_data) print "Downloading grib files from DataMart..." #for all the files on the datamart for s,name in enumerate(filelist): try: if not os.path.exists(os.path.join(RepoPath, name)): #if file doesn't exist locally, then download os.system("wget -O " + os.path.join(RepoPath, name) + " " + url + "/" + name) except: pass print "All of the files have been downloaded from:\n" + url #get the timestamp of the last file pattern = filename_nomenclature + "(\d+)(_\d+.grib2)" m = re.search(pattern,filelist[-1]) #only look at the last file in the list if m: lasttimestring = m.groups()[0] lasttimestep = datetime.datetime.strptime(lasttimestring,"%Y%m%d%H") suffix = m.groups()[1] grib_path_string = os.path.join(RepoPath,filename_nomenclature + "%Y%m%d%H" + suffix) #return values if the pattern was found, this will give an error if the pattern doesn't match #this error is intentional because the user must troubleshoot it if this occurs return lasttimestep, grib_path_string
def Download_Datamart_ReAnalysisHindcast(config_file, type, RepoPath): """ Downloads reanalysis data from the EC datamart, converts and appends it to existing data. This is meant to update hindcast meteorological data up to the current date before the hindcast is run. It is currently only used for CaPA data. Args: config_file: see class ConfigParse() type: string to determine type of ReAnalysis Data; currently CaPA is the only reanalysis that exists RepoPath: directory where the reanalysis hindcast r2c file is stored and the reanalysis grib files are stored Returns: NULL - but downloads, converts and appends reanalysis data to r2c files """ #Initialize some useful variables timeVar = FrameworkLibrary.getDateTime(hours = 0) #get today at time = 0 timestamp = timeVar.strftime("%Y%m%d%H") ScriptDir = config_file.scripts_directory #define server names if type == 'CaPA': url = 'http://dd.weather.gc.ca/analysis/precip/rdpa/grib2/polar_stereographic/06/' filename_nomenclature = 'CMC_RDPA_APCP-006-0700cutoff_SFC_0_ps10km_' else: raise ValueError('Source type is not defined. Only "CaPA" hindcast data can currently be downloaded') #get list of files on the server #http://stackoverflow.com/questions/10875215/python-urllib-downloading-contents-of-an-online-directory urlpath = urllib2.urlopen(url) server_data = urlpath.read() filename_pattern = re.compile('"(' + filename_nomenclature + '.+.grib2)"') filelist = filename_pattern.findall(server_data) print "Downloading grib files from DataMart..." #for all the files on the datamart for s,name in enumerate(filelist): try: if not os.path.exists(os.path.join(RepoPath, name)): #if file doesn't exist locally, then download os.system("wget -O " + os.path.join(RepoPath, name) + " " + url + "/" + name) except: pass print "All of the files have been downloaded from:\n" + url #get the timestamp of the last file pattern = filename_nomenclature + "(\d+)(_\d+.grib2)" m = re.search(pattern,filelist[-1]) #only look at the last file in the list if m: lasttimestring = m.groups()[0] lasttimestep = datetime.datetime.strptime(lasttimestring,"%Y%m%d%H") suffix = m.groups()[1] grib_path_string = os.path.join(RepoPath,filename_nomenclature + "%Y%m%d%H" + suffix) #return values if the pattern was found, this will give an error if the pattern doesn't match #this error is intentional because the user must troubleshoot it if this occurs return lasttimestep, grib_path_string