Ejemplo n.º 1
0
def Download_Datamart_GEMHindcast(config_file, type, RepoPath):
    """
    Downloads forecast data from the EC datamart, converts and appends it to existing data.
    This is meant to update hindcast meteorological data up to the current date before the hindcast is run.
    It is currently only used for temperature data. Currently the best way to get gridded temperature data
    with good temporal resolution is to use the prior days' forecast.
    
    Args:
        config_file: see class ConfigParse()
        type: string to determine type of ReAnalysis Data; currently CaPA is the only reanalysis that exists
        RepoPath: directory where the reanalysis hindcast r2c file is stored and the reanalysis grib files are stored
    Returns:
        NULL - but downloads, converts and appends reanalysis data to r2c files
    """
    
    #Initialize some useful variables
    timeVar = FrameworkLibrary.getDateTime(hours = 0) #get today at time = 0
    timestamp = timeVar.strftime("%Y%m%d%H")
    ScriptDir = config_file.scripts_directory


    #define server/model defaults
    if type == 'GEMTemps':
        url = 'http://dd.weather.gc.ca/model_gem_regional/10km/grib2/'
        filename_nomenclature = 'CMC_reg_TMP_TGL_2_ps10km_'
        forecast_periods = [00,06,12,18] #the forecast is produced 4 times a day
        time_periods = [000,003] # want to grab these hours to stitch together
    else:
        raise ValueError('Source type is not defined. Only "GEMTemps" forecast/hindcast data can currently be downloaded')
        
        
    #the model data is only stored online for today and yesterday
    #if this changes, then you will need to modify the dates
    now = datetime.datetime.now()
    yesterday = now - datetime.timedelta(days=1)

    now_datestamp = now.strftime("%Y%m%d")
    yesterday_datestamp = yesterday.strftime("%Y%m%d")

    dates = [yesterday_datestamp,now_datestamp]
    

    #Download grib2 files from DataMart ****************************************************** 
    #While an online version exists and a local version does not download then repeat (hours 000 & 003 for all four forecasts)
    for k,day in enumerate(dates):
        for i,startperiod in enumerate(forecast_periods):
            for j,starthour in enumerate(time_periods):
            
                filename = filename_nomenclature + day + str(startperiod).zfill(2) +'_P' + str(starthour).zfill(3) + '.grib2'  
                website = url + str(startperiod).zfill(2) + '/' + str(starthour).zfill(3) + '/' + filename
          
                if os.path.exists(os.path.join(RepoPath,filename)): #check if file already exists in local directory
                    lastfile = filename

                else:
                    try: #download if remote file exists
                        urllib2.urlopen(website) #command to see if remote file can be opened
                        os.system("wget -O " + os.path.join(RepoPath,filename) + " " + website) #use wget to actually download the file
                        lastfile = os.path.join(RepoPath,filename)
                    except urllib2.URLError as e: #do nothing if remote file doesn't exist
                        pass
            
            
    print "All of the files have been downloaded from:\n" + url

    
    #get the timestamp of the last file
    pattern = filename_nomenclature + "(\d+)_P(\d+).grib2"

    m = re.search(pattern,lastfile)
    if m:
        lasttimestring = m.groups()[0]
        forecast_hour = int(m.groups()[1])
        lasttimestep = datetime.datetime.strptime(lasttimestring,"%Y%m%d%H") + datetime.timedelta(hours=forecast_hour)

    #return values if the pattern was found, this will give an error if the pattern doesn't match
    #this error is intentional because the user must troubleshoot it if this occurs
    return lasttimestep, filename_nomenclature
Ejemplo n.º 2
0
def Download_Datamart_GEMHindcast(config_file, type, RepoPath):
    """
    Downloads forecast data from the EC datamart, converts and appends it to existing data.
    This is meant to update hindcast meteorological data up to the current date before the hindcast is run.
    It is currently only used for temperature data. Currently the best way to get gridded temperature data
    with good temporal resolution is to use the prior days' forecast.
    
    Args:
        config_file: see class ConfigParse()
        type: string to determine type of ReAnalysis Data; currently CaPA is the only reanalysis that exists
        RepoPath: directory where the reanalysis hindcast r2c file is stored and the reanalysis grib files are stored
    Returns:
        NULL - but downloads, converts and appends reanalysis data to r2c files
    """
    
    #Initialize some useful variables
    timeVar = FrameworkLibrary.getDateTime(hours = 0) #get today at time = 0
    timestamp = timeVar.strftime("%Y%m%d%H")
    ScriptDir = config_file.scripts_directory


    #define server/model defaults
    if type == 'GEMTemps':
        url = 'http://dd.weather.gc.ca/model_gem_regional/10km/grib2/'
        filename_nomenclature = 'CMC_reg_TMP_TGL_2_ps10km_'
        forecast_periods = [00,06,12,18] #the forecast is produced 4 times a day
        time_periods = [000,003] # want to grab these hours to stitch together
    else:
        raise ValueError('Source type is not defined. Only "GEMTemps" forecast/hindcast data can currently be downloaded')
        
        
    #the model data is only stored online for today and yesterday
    #if this changes, then you will need to modify the dates
    now = datetime.datetime.now()
    yesterday = now - datetime.timedelta(days=1)

    now_datestamp = now.strftime("%Y%m%d")
    yesterday_datestamp = yesterday.strftime("%Y%m%d")

    dates = [yesterday_datestamp,now_datestamp]
    

    #Download grib2 files from DataMart ****************************************************** 
    #While an online version exists and a local version does not download then repeat (hours 000 & 003 for all four forecasts)
    for k,day in enumerate(dates):
        for i,startperiod in enumerate(forecast_periods):
            for j,starthour in enumerate(time_periods):
            
                filename = filename_nomenclature + day + str(startperiod).zfill(2) +'_P' + str(starthour).zfill(3) + '.grib2'  
                website = url + str(startperiod).zfill(2) + '/' + str(starthour).zfill(3) + '/' + filename
          
                if os.path.exists(os.path.join(RepoPath,filename)): #check if file already exists in local directory
                    lastfile = filename

                else:
                    try: #download if remote file exists
                        urllib2.urlopen(website) #command to see if remote file can be opened
                        os.system("wget -O " + os.path.join(RepoPath,filename) + " " + website) #use wget to actually download the file
                        lastfile = os.path.join(RepoPath,filename)
                    except urllib2.URLError as e: #do nothing if remote file doesn't exist
                        pass
            
            
    print "All of the files have been downloaded from:\n" + url

    
    #get the timestamp of the last file
    pattern = filename_nomenclature + "(\d+)_P(\d+).grib2"

    m = re.search(pattern,lastfile)
    if m:
        lasttimestring = m.groups()[0]
        forecast_hour = int(m.groups()[1])
        lasttimestep = datetime.datetime.strptime(lasttimestring,"%Y%m%d%H") + datetime.timedelta(hours=forecast_hour)

    #return values if the pattern was found, this will give an error if the pattern doesn't match
    #this error is intentional because the user must troubleshoot it if this occurs
    return lasttimestep, filename_nomenclature
Ejemplo n.º 3
0
def Download_Datamart_ReAnalysisHindcast(config_file, type, RepoPath):
    """
    Downloads reanalysis data from the EC datamart, converts and appends it to existing data.
    This is meant to update hindcast meteorological data up to the current date before the hindcast is run.
    It is currently only used for CaPA data.
    
    Args:
        config_file: see class ConfigParse()
        type: string to determine type of ReAnalysis Data; currently CaPA is the only reanalysis that exists
        RepoPath: directory where the reanalysis hindcast r2c file is stored and the reanalysis grib files are stored
    Returns:
        NULL - but downloads, converts and appends reanalysis data to r2c files
    """
    
    #Initialize some useful variables
    timeVar = FrameworkLibrary.getDateTime(hours = 0) #get today at time = 0
    timestamp = timeVar.strftime("%Y%m%d%H")
    ScriptDir = config_file.scripts_directory


    #define server names
    if type == 'CaPA':
        url = 'http://dd.weather.gc.ca/analysis/precip/rdpa/grib2/polar_stereographic/06/'
        filename_nomenclature = 'CMC_RDPA_APCP-006-0700cutoff_SFC_0_ps10km_'
        
                
    else:
        raise ValueError('Source type is not defined. Only "CaPA" hindcast data can currently be downloaded')
    

    #get list of files on the server
    #http://stackoverflow.com/questions/10875215/python-urllib-downloading-contents-of-an-online-directory
    urlpath = urllib2.urlopen(url)
    server_data = urlpath.read()
    filename_pattern = re.compile('"(' + filename_nomenclature + '.+.grib2)"')
    filelist = filename_pattern.findall(server_data)
    
    
    print "Downloading grib files from DataMart..."
    #for all the files on the datamart
    for s,name in enumerate(filelist):
        try:
            if not os.path.exists(os.path.join(RepoPath, name)): #if file doesn't exist locally, then download
                os.system("wget -O " + os.path.join(RepoPath, name) + " " + url + "/" + name)
        except:
            pass
    print "All of the files have been downloaded from:\n" + url
    
    #get the timestamp of the last file
    pattern = filename_nomenclature + "(\d+)(_\d+.grib2)"

    m = re.search(pattern,filelist[-1]) #only look at the last file in the list
    if m:
        lasttimestring = m.groups()[0]
        lasttimestep = datetime.datetime.strptime(lasttimestring,"%Y%m%d%H")
        
        suffix = m.groups()[1]
        grib_path_string = os.path.join(RepoPath,filename_nomenclature + "%Y%m%d%H" + suffix)

    #return values if the pattern was found, this will give an error if the pattern doesn't match
    #this error is intentional because the user must troubleshoot it if this occurs
    return lasttimestep, grib_path_string
Ejemplo n.º 4
0
def Download_Datamart_ReAnalysisHindcast(config_file, type, RepoPath):
    """
    Downloads reanalysis data from the EC datamart, converts and appends it to existing data.
    This is meant to update hindcast meteorological data up to the current date before the hindcast is run.
    It is currently only used for CaPA data.
    
    Args:
        config_file: see class ConfigParse()
        type: string to determine type of ReAnalysis Data; currently CaPA is the only reanalysis that exists
        RepoPath: directory where the reanalysis hindcast r2c file is stored and the reanalysis grib files are stored
    Returns:
        NULL - but downloads, converts and appends reanalysis data to r2c files
    """
    
    #Initialize some useful variables
    timeVar = FrameworkLibrary.getDateTime(hours = 0) #get today at time = 0
    timestamp = timeVar.strftime("%Y%m%d%H")
    ScriptDir = config_file.scripts_directory


    #define server names
    if type == 'CaPA':
        url = 'http://dd.weather.gc.ca/analysis/precip/rdpa/grib2/polar_stereographic/06/'
        filename_nomenclature = 'CMC_RDPA_APCP-006-0700cutoff_SFC_0_ps10km_'
        
                
    else:
        raise ValueError('Source type is not defined. Only "CaPA" hindcast data can currently be downloaded')
    

    #get list of files on the server
    #http://stackoverflow.com/questions/10875215/python-urllib-downloading-contents-of-an-online-directory
    urlpath = urllib2.urlopen(url)
    server_data = urlpath.read()
    filename_pattern = re.compile('"(' + filename_nomenclature + '.+.grib2)"')
    filelist = filename_pattern.findall(server_data)
    
    
    print "Downloading grib files from DataMart..."
    #for all the files on the datamart
    for s,name in enumerate(filelist):
        try:
            if not os.path.exists(os.path.join(RepoPath, name)): #if file doesn't exist locally, then download
                os.system("wget -O " + os.path.join(RepoPath, name) + " " + url + "/" + name)
        except:
            pass
    print "All of the files have been downloaded from:\n" + url
    
    #get the timestamp of the last file
    pattern = filename_nomenclature + "(\d+)(_\d+.grib2)"

    m = re.search(pattern,filelist[-1]) #only look at the last file in the list
    if m:
        lasttimestring = m.groups()[0]
        lasttimestep = datetime.datetime.strptime(lasttimestring,"%Y%m%d%H")
        
        suffix = m.groups()[1]
        grib_path_string = os.path.join(RepoPath,filename_nomenclature + "%Y%m%d%H" + suffix)

    #return values if the pattern was found, this will give an error if the pattern doesn't match
    #this error is intentional because the user must troubleshoot it if this occurs
    return lasttimestep, grib_path_string