def Run_Framework():
    #get arguments
    class args(object):
        pass

    data = args()
    parser = argparse.ArgumentParser()
    parser.add_argument('-c',
                        '--Config',
                        help='Full path to the configuration file.')
    parser.add_argument(
        '-m',
        '--ModelRun',
        help='Type of model run: Spinup,DefaultHindcast,HindcastAdjust,Forecast'
    )
    parser.parse_args(namespace=data)

    ## read configuration file
    config_file = FrameworkLibrary.ConfigParse(
        data.Config)  #config_file is a class that stores all parameters
    model_run = data.ModelRun

    #= set inital working directory to repository root folder
    os.chdir(config_file.repository_directory)

    ## ===== run operational framework

    # if Update Configuration File (specifically the hindcast and forecast dates)
    if model_run == "UpdateConfig":
        print "\n===============Updating Configuration File with Today's dates===================\n"
        FrameworkLibrary.UpdateConfig(config_file)

    #= spin up
    elif model_run == "Spinup":
        FrameworkLibrary.spin_up(config_file)

    # hindcast
    elif model_run == "DefaultHindcast":
        FrameworkLibrary.hindcast(config_file)

    # Forecast
    elif model_run == "Forecast":
        FrameworkLibrary.forecast(config_file)

    # Accept and Copy
    elif model_run == "AcceptAndCopy":
        FrameworkLibrary.AcceptAndCopy(config_file)

    else:
        print "\noptions selected are not correct. please review configuration settings.\n"
def Run_Framework():
    # get arguments
    class args(object):
        pass

    data = args()
    parser = argparse.ArgumentParser()
    parser.add_argument("-c", "--Config", help="Full path to the configuration file.")
    parser.add_argument("-m", "--ModelRun", help="Type of model run: Spinup,DefaultHindcast,HindcastAdjust,Forecast")
    parser.parse_args(namespace=data)

    ## read configuration file
    config_file = FrameworkLibrary.ConfigParse(data.Config)  # config_file is a class that stores all parameters
    model_run = data.ModelRun

    # = set inital working directory to repository root folder
    os.chdir(config_file.repository_directory)

    ## ===== run operational framework

    # if Update Configuration File (specifically the hindcast and forecast dates)
    if model_run == "UpdateConfig":
        print "\n===============Updating Configuration File with Today's dates===================\n"
        FrameworkLibrary.UpdateConfig(config_file)

    # = spin up
    elif model_run == "Spinup":
        FrameworkLibrary.spin_up(config_file)

    # hindcast
    elif model_run == "DefaultHindcast":
        FrameworkLibrary.hindcast(config_file)

    # Forecast
    elif model_run == "Forecast":
        FrameworkLibrary.forecast(config_file)

    # Accept and Copy
    elif model_run == "AcceptAndCopy":
        FrameworkLibrary.AcceptAndCopy(config_file)

    else:
        print "\noptions selected are not correct. please review configuration settings.\n"
Esempio n. 3
0
def Download_Datamart_GEMHindcast(config_file, type, RepoPath):
    """
    Downloads forecast data from the EC datamart, converts and appends it to existing data.
    This is meant to update hindcast meteorological data up to the current date before the hindcast is run.
    It is currently only used for temperature data. Currently the best way to get gridded temperature data
    with good temporal resolution is to use the prior days' forecast.
    
    Args:
        config_file: see class ConfigParse()
        type: string to determine type of ReAnalysis Data; currently CaPA is the only reanalysis that exists
        RepoPath: directory where the reanalysis hindcast r2c file is stored and the reanalysis grib files are stored
    Returns:
        NULL - but downloads, converts and appends reanalysis data to r2c files
    """
    
    #Initialize some useful variables
    timeVar = FrameworkLibrary.getDateTime(hours = 0) #get today at time = 0
    timestamp = timeVar.strftime("%Y%m%d%H")
    ScriptDir = config_file.scripts_directory


    #define server/model defaults
    if type == 'GEMTemps':
        url = 'http://dd.weather.gc.ca/model_gem_regional/10km/grib2/'
        filename_nomenclature = 'CMC_reg_TMP_TGL_2_ps10km_'
        forecast_periods = [00,06,12,18] #the forecast is produced 4 times a day
        time_periods = [000,003] # want to grab these hours to stitch together
    else:
        raise ValueError('Source type is not defined. Only "GEMTemps" forecast/hindcast data can currently be downloaded')
        
        
    #the model data is only stored online for today and yesterday
    #if this changes, then you will need to modify the dates
    now = datetime.datetime.now()
    yesterday = now - datetime.timedelta(days=1)

    now_datestamp = now.strftime("%Y%m%d")
    yesterday_datestamp = yesterday.strftime("%Y%m%d")

    dates = [yesterday_datestamp,now_datestamp]
    

    #Download grib2 files from DataMart ****************************************************** 
    #While an online version exists and a local version does not download then repeat (hours 000 & 003 for all four forecasts)
    for k,day in enumerate(dates):
        for i,startperiod in enumerate(forecast_periods):
            for j,starthour in enumerate(time_periods):
            
                filename = filename_nomenclature + day + str(startperiod).zfill(2) +'_P' + str(starthour).zfill(3) + '.grib2'  
                website = url + str(startperiod).zfill(2) + '/' + str(starthour).zfill(3) + '/' + filename
          
                if os.path.exists(os.path.join(RepoPath,filename)): #check if file already exists in local directory
                    lastfile = filename

                else:
                    try: #download if remote file exists
                        urllib2.urlopen(website) #command to see if remote file can be opened
                        os.system("wget -O " + os.path.join(RepoPath,filename) + " " + website) #use wget to actually download the file
                        lastfile = os.path.join(RepoPath,filename)
                    except urllib2.URLError as e: #do nothing if remote file doesn't exist
                        pass
            
            
    print "All of the files have been downloaded from:\n" + url

    
    #get the timestamp of the last file
    pattern = filename_nomenclature + "(\d+)_P(\d+).grib2"

    m = re.search(pattern,lastfile)
    if m:
        lasttimestring = m.groups()[0]
        forecast_hour = int(m.groups()[1])
        lasttimestep = datetime.datetime.strptime(lasttimestring,"%Y%m%d%H") + datetime.timedelta(hours=forecast_hour)

    #return values if the pattern was found, this will give an error if the pattern doesn't match
    #this error is intentional because the user must troubleshoot it if this occurs
    return lasttimestep, filename_nomenclature
Esempio n. 4
0
def Download_Datamart_ReAnalysisHindcast(config_file, type, RepoPath):
    """
    Downloads reanalysis data from the EC datamart, converts and appends it to existing data.
    This is meant to update hindcast meteorological data up to the current date before the hindcast is run.
    It is currently only used for CaPA data.
    
    Args:
        config_file: see class ConfigParse()
        type: string to determine type of ReAnalysis Data; currently CaPA is the only reanalysis that exists
        RepoPath: directory where the reanalysis hindcast r2c file is stored and the reanalysis grib files are stored
    Returns:
        NULL - but downloads, converts and appends reanalysis data to r2c files
    """
    
    #Initialize some useful variables
    timeVar = FrameworkLibrary.getDateTime(hours = 0) #get today at time = 0
    timestamp = timeVar.strftime("%Y%m%d%H")
    ScriptDir = config_file.scripts_directory


    #define server names
    if type == 'CaPA':
        url = 'http://dd.weather.gc.ca/analysis/precip/rdpa/grib2/polar_stereographic/06/'
        filename_nomenclature = 'CMC_RDPA_APCP-006-0700cutoff_SFC_0_ps10km_'
        
                
    else:
        raise ValueError('Source type is not defined. Only "CaPA" hindcast data can currently be downloaded')
    

    #get list of files on the server
    #http://stackoverflow.com/questions/10875215/python-urllib-downloading-contents-of-an-online-directory
    urlpath = urllib2.urlopen(url)
    server_data = urlpath.read()
    filename_pattern = re.compile('"(' + filename_nomenclature + '.+.grib2)"')
    filelist = filename_pattern.findall(server_data)
    
    
    print "Downloading grib files from DataMart..."
    #for all the files on the datamart
    for s,name in enumerate(filelist):
        try:
            if not os.path.exists(os.path.join(RepoPath, name)): #if file doesn't exist locally, then download
                os.system("wget -O " + os.path.join(RepoPath, name) + " " + url + "/" + name)
        except:
            pass
    print "All of the files have been downloaded from:\n" + url
    
    #get the timestamp of the last file
    pattern = filename_nomenclature + "(\d+)(_\d+.grib2)"

    m = re.search(pattern,filelist[-1]) #only look at the last file in the list
    if m:
        lasttimestring = m.groups()[0]
        lasttimestep = datetime.datetime.strptime(lasttimestring,"%Y%m%d%H")
        
        suffix = m.groups()[1]
        grib_path_string = os.path.join(RepoPath,filename_nomenclature + "%Y%m%d%H" + suffix)

    #return values if the pattern was found, this will give an error if the pattern doesn't match
    #this error is intentional because the user must troubleshoot it if this occurs
    return lasttimestep, grib_path_string
Esempio n. 5
0
def repo_pull_nomads(repos, filePath, timestamp, repo_path):
    """
    Downloads forecast data from NOMADS repository using wget
    
        Args:
        repos: the source data in a single source from the config file, see below for example
        filePath: the filepath where the scripts are run ex) Q:\WR_Ensemble_dev\A_MS\Repo\scripts
        timestamp: datestamp + start hour, this is currently the config.file date with a static start hour of '00'
        repo_path: path to store all the repo data; currently 'config_file.grib_forecast_repo'
        
            Website:
            http://nomads.ncep.noaa.gov/txt_descriptions/CMCENS_doc.shtml
            http://nomads.ncep.noaa.gov/cgi-bin/filter_cmcens.pl?file=cmc_gep00.t00z.pgrb2af384&lev_surface=on&var_APCP=on&var_TMP=on&subregion=&leftlon=-98&rightlon=-88&toplat=54&bottomlat=46&dir=%2Fcmce.20160830%2F00%2Fpgrb2a
            http://nomads.ncep.noaa.gov/cgi-bin/filter_gens.pl?file=gec00.t00z.pgrb2anl&lev_2_m_above_ground=on&lev_surface=on&var_TMP=on&subregion=&leftlon=-98&rightlon=-88&toplat=54&bottomlat=46&dir=%2Fgefs.20160830%2F00%2Fpgrb2
            
            #Example repos from config file, note substitution parameters (%X) in :FileName
           :SourceData  
           0:URL                http://nomads.ncep.noaa.gov/cgi-bin/              
           1:FileName           filter_%S1.pl?file=%S2gep%E.t%Hz.pgrb2af%T&%query&subregion=&leftlon=-98&rightlon=-88&toplat=54&bottomlat=46&dir=%2F%S3.%Y%m%d%2F00%2Fpgrb2a
           2:DeltaTimeStart     6                                                                          
           3:DeltaTimeEnd       240                                                                         
           4:DeltaTimeStep      6                                                                          
           5:StitchTimeStart    6                                                                          
           6:StitchTimeEnd      240                                                                         
           7:Grouping           tem                                                                        
           8:Type               NOMAD_GFS                                                                        
           9:Forecast           3
           10:num_ensembles     20   
           :EndSourceData

    Returns:
        NULL - downloads grib files from online repository
    """
    
    #build repository directory to store the date's files
    today_repo_path = repo_path + "/" + timestamp + "/"
    FrameworkLibrary.build_dir(today_repo_path)

    #get arguments from repos, we assume that 2 NOMADS sources won't be stitched together 
    #(as we do with the datamart GEM regional and global model), hence only grab the first string (ie. repos[*][0])
    url = repos[0][0]
    DeltaTimeStart = int(repos[2][0])
    DeltaTimeEnd = int(repos[3][0])
    DeltaTimeStep = int(repos[4][0])
    Source =  repos[8][0]
    Grouping = repos[7][0]
    num_ensembles = int(repos[10][0])
    wget_list = []

    print 'building list of files for download'
    for k in range(1,num_ensembles + 1): #for each ensemble member
        #set progress bar
        pbar = k/float(num_ensembles) * 40
        sys.stdout.write('\r')
        # the exact output you're looking for:
        sys.stdout.write("[%-40s] %d%%" % ('='*int(pbar), pbar/40*100))
        sys.stdout.flush()

        for j in range(DeltaTimeStart/DeltaTimeStep,DeltaTimeEnd/DeltaTimeStep + 1): #for each timestep

            ensemble = str(k).zfill(2) #save ensemble number in 2 digit format
          
            #Set timestep and replace in file name
            DeltaTime = j * DeltaTimeStep
            name = repos[1][0].replace('%T', str(DeltaTime).zfill(2))
            
            #replace the ensemble number in file name
            name = name.replace('%E',ensemble)
            
            #replace the data request in file name
            if Grouping == 'met':
                name = name.replace('%query', 'lev_surface=on&var_APCP=on')
                
            if Grouping == 'tem':
                name = name.replace('%query', 'lev_2_m_above_ground=on&var_TMP')   

            #replace the source in the file name (ie. CMC NAEFS, or GFS NAEFS)
            if Source == 'NOMAD_GFS':
                name = name.replace('%S1', 'gens')
                name = name.replace('%S2', '')
                name = name.replace('%S3', 'gefs')
                
            if Source == 'NOMAD_CMC':
                name = name.replace('%S1', 'cmcens')
                name = name.replace('%S2', 'cmc_')
                name = name.replace('%S3', 'cmce')
                
            #concatenate and create wget command
            downloadname = url + name
            filename = Source + '_' + Grouping + '_' + ensemble + '_' +  str(DeltaTime).zfill(3) + '_' + timestamp + '.grib2'
            cmd = "wget -q -O " + today_repo_path + filename + " " + '"' + downloadname + '"' + " 2> NUL"
            
            #append to wget download list if file doesn't exist locally
            if not os.path.isfile(today_repo_path + filename): #if file does not exist locally
                  wget_list.append(cmd)
                  
    #now run wget with multiple threads, this speeds up download time considerably
    print '\nDownloading Files... \n'
    pool = multiprocessing.Pool(processes = 20)
    pool.map(os.system,wget_list)
Esempio n. 6
0
def repo_pull_datamart(repos,filePath,timestamp,repo_path):
    """
    Downloads forecast data from EC datamart repository using wget
    http://dd.weather.gc.ca/
    
    Args:
        repos: the source data in a single source from the config file, see below for example
        filePath: the filepath where the scripts are run ex) Q:\WR_Ensemble_dev\A_MS\Repo\scripts
        timestamp: datestamp + start hour, this is currently the config.file date with a static start hour of '00'
        repo_path: path to store all the repo data; currently 'config_file.grib_forecast_repo'
        
        [0]   :URL                http://dd.weather.gc.ca/model_gem_regional/10km/grib2/%H/%T/                http://dd.weather.gc.ca/model_gem_global/25km/grib2/lat_lon/%H/%T/ 
        [1]   :FileName           CMC_reg_APCP_SFC_0_ps10km_%Y%m%d%H_P%T.grib2                                CMC_glb_APCP_SFC_0_latlon.24x.24_%Y%m%d%H_P%T.grib2
        [2]   :DeltaTimeStart     3                                                                           3            
        [3]   :DeltaTimeEnd       48                                                                          240        
        [4]   :DeltaTimeStep      3                                                                           3                          
        [5]   :StitchTimeStart    3                                                                           48
        [6]   :StitchTimeEnd      48                                                                          240
        [7]   :Grouping           met                                                                         met   
        [8]   :Type               GEM                                                                         GEM
        [9]   :Forecast           1                                                                           1

    Returns:
        NULL - downloads grib files from online repository
    """

    #build repository directory to store the date's files
    today_repo_path = repo_path + "/" + timestamp + "/"
    FrameworkLibrary.build_dir(today_repo_path)

    #for each of the 'series' that are being stitched together (typically 1 or 2)
    for i, url in enumerate(repos[0]): 
      DeltaTimeStart = int(repos[2][i])
      DeltaTimeEnd = int(repos[3][i])
      DeltaTimeStep = int(repos[4][i])
      
      #loop through the time series
      for j in range(DeltaTimeStart/DeltaTimeStep,DeltaTimeEnd/DeltaTimeStep + 1):
        #set progress bar
        pbar = (j+1-DeltaTimeStart/DeltaTimeStep)/float((DeltaTimeEnd/DeltaTimeStep + 1)-DeltaTimeStart/DeltaTimeStep) * 40
        sys.stdout.write('\r')
        # the exact output you're looking for:
        sys.stdout.write("[%-40s] %d%%" % ('='*int(pbar), pbar/40*100))
        sys.stdout.flush()
      
        DeltaTime = j * DeltaTimeStep
        #replace %T with the deltaT
        url = repos[0][i].replace('%T', str(DeltaTime).zfill(3))
        name = repos[1][i].replace('%T', str(DeltaTime).zfill(3))
        
        filename = url + name
        
        #run wget
        if not os.path.isfile(today_repo_path + name): #if file does not exist locally
          try: #download if remote file exists
              urllib2.urlopen(filename) #command to see if remote file can be opened
              os.system("wget -q -O " + today_repo_path + name + " " + filename + " 2> NUL") #use wget to actually download the file
          except urllib2.URLError as e: #do nothing if remote file doesn't exist
              print " Error: File does not exist locally or remotely"
        

      print "\n"
Esempio n. 7
0
def Download_Datamart_GEMHindcast(config_file, type, RepoPath):
    """
    Downloads forecast data from the EC datamart, converts and appends it to existing data.
    This is meant to update hindcast meteorological data up to the current date before the hindcast is run.
    It is currently only used for temperature data. Currently the best way to get gridded temperature data
    with good temporal resolution is to use the prior days' forecast.
    
    Args:
        config_file: see class ConfigParse()
        type: string to determine type of ReAnalysis Data; currently CaPA is the only reanalysis that exists
        RepoPath: directory where the reanalysis hindcast r2c file is stored and the reanalysis grib files are stored
    Returns:
        NULL - but downloads, converts and appends reanalysis data to r2c files
    """
    
    #Initialize some useful variables
    timeVar = FrameworkLibrary.getDateTime(hours = 0) #get today at time = 0
    timestamp = timeVar.strftime("%Y%m%d%H")
    ScriptDir = config_file.scripts_directory


    #define server/model defaults
    if type == 'GEMTemps':
        url = 'http://dd.weather.gc.ca/model_gem_regional/10km/grib2/'
        filename_nomenclature = 'CMC_reg_TMP_TGL_2_ps10km_'
        forecast_periods = [00,06,12,18] #the forecast is produced 4 times a day
        time_periods = [000,003] # want to grab these hours to stitch together
    else:
        raise ValueError('Source type is not defined. Only "GEMTemps" forecast/hindcast data can currently be downloaded')
        
        
    #the model data is only stored online for today and yesterday
    #if this changes, then you will need to modify the dates
    now = datetime.datetime.now()
    yesterday = now - datetime.timedelta(days=1)

    now_datestamp = now.strftime("%Y%m%d")
    yesterday_datestamp = yesterday.strftime("%Y%m%d")

    dates = [yesterday_datestamp,now_datestamp]
    

    #Download grib2 files from DataMart ****************************************************** 
    #While an online version exists and a local version does not download then repeat (hours 000 & 003 for all four forecasts)
    for k,day in enumerate(dates):
        for i,startperiod in enumerate(forecast_periods):
            for j,starthour in enumerate(time_periods):
            
                filename = filename_nomenclature + day + str(startperiod).zfill(2) +'_P' + str(starthour).zfill(3) + '.grib2'  
                website = url + str(startperiod).zfill(2) + '/' + str(starthour).zfill(3) + '/' + filename
          
                if os.path.exists(os.path.join(RepoPath,filename)): #check if file already exists in local directory
                    lastfile = filename

                else:
                    try: #download if remote file exists
                        urllib2.urlopen(website) #command to see if remote file can be opened
                        os.system("wget -O " + os.path.join(RepoPath,filename) + " " + website) #use wget to actually download the file
                        lastfile = os.path.join(RepoPath,filename)
                    except urllib2.URLError as e: #do nothing if remote file doesn't exist
                        pass
            
            
    print "All of the files have been downloaded from:\n" + url

    
    #get the timestamp of the last file
    pattern = filename_nomenclature + "(\d+)_P(\d+).grib2"

    m = re.search(pattern,lastfile)
    if m:
        lasttimestring = m.groups()[0]
        forecast_hour = int(m.groups()[1])
        lasttimestep = datetime.datetime.strptime(lasttimestring,"%Y%m%d%H") + datetime.timedelta(hours=forecast_hour)

    #return values if the pattern was found, this will give an error if the pattern doesn't match
    #this error is intentional because the user must troubleshoot it if this occurs
    return lasttimestep, filename_nomenclature
Esempio n. 8
0
def Download_Datamart_ReAnalysisHindcast(config_file, type, RepoPath):
    """
    Downloads reanalysis data from the EC datamart, converts and appends it to existing data.
    This is meant to update hindcast meteorological data up to the current date before the hindcast is run.
    It is currently only used for CaPA data.
    
    Args:
        config_file: see class ConfigParse()
        type: string to determine type of ReAnalysis Data; currently CaPA is the only reanalysis that exists
        RepoPath: directory where the reanalysis hindcast r2c file is stored and the reanalysis grib files are stored
    Returns:
        NULL - but downloads, converts and appends reanalysis data to r2c files
    """
    
    #Initialize some useful variables
    timeVar = FrameworkLibrary.getDateTime(hours = 0) #get today at time = 0
    timestamp = timeVar.strftime("%Y%m%d%H")
    ScriptDir = config_file.scripts_directory


    #define server names
    if type == 'CaPA':
        url = 'http://dd.weather.gc.ca/analysis/precip/rdpa/grib2/polar_stereographic/06/'
        filename_nomenclature = 'CMC_RDPA_APCP-006-0700cutoff_SFC_0_ps10km_'
        
                
    else:
        raise ValueError('Source type is not defined. Only "CaPA" hindcast data can currently be downloaded')
    

    #get list of files on the server
    #http://stackoverflow.com/questions/10875215/python-urllib-downloading-contents-of-an-online-directory
    urlpath = urllib2.urlopen(url)
    server_data = urlpath.read()
    filename_pattern = re.compile('"(' + filename_nomenclature + '.+.grib2)"')
    filelist = filename_pattern.findall(server_data)
    
    
    print "Downloading grib files from DataMart..."
    #for all the files on the datamart
    for s,name in enumerate(filelist):
        try:
            if not os.path.exists(os.path.join(RepoPath, name)): #if file doesn't exist locally, then download
                os.system("wget -O " + os.path.join(RepoPath, name) + " " + url + "/" + name)
        except:
            pass
    print "All of the files have been downloaded from:\n" + url
    
    #get the timestamp of the last file
    pattern = filename_nomenclature + "(\d+)(_\d+.grib2)"

    m = re.search(pattern,filelist[-1]) #only look at the last file in the list
    if m:
        lasttimestring = m.groups()[0]
        lasttimestep = datetime.datetime.strptime(lasttimestring,"%Y%m%d%H")
        
        suffix = m.groups()[1]
        grib_path_string = os.path.join(RepoPath,filename_nomenclature + "%Y%m%d%H" + suffix)

    #return values if the pattern was found, this will give an error if the pattern doesn't match
    #this error is intentional because the user must troubleshoot it if this occurs
    return lasttimestep, grib_path_string
Esempio n. 9
0
def repo_pull_nomads(repos, filePath, timestamp, repo_path):
    """
    Downloads forecast data from NOMADS repository using wget
    
        Args:
        repos: the source data in a single source from the config file, see below for example
        filePath: the filepath where the scripts are run ex) Q:\WR_Ensemble_dev\A_MS\Repo\scripts
        timestamp: datestamp + start hour, this is currently the config.file date with a static start hour of '00'
        repo_path: path to store all the repo data; currently 'config_file.grib_forecast_repo'
        
            Website:
            http://nomads.ncep.noaa.gov/txt_descriptions/CMCENS_doc.shtml
            http://nomads.ncep.noaa.gov/cgi-bin/filter_cmcens.pl?file=cmc_gep00.t00z.pgrb2af384&lev_surface=on&var_APCP=on&var_TMP=on&subregion=&leftlon=-98&rightlon=-88&toplat=54&bottomlat=46&dir=%2Fcmce.20160830%2F00%2Fpgrb2a
            http://nomads.ncep.noaa.gov/cgi-bin/filter_gens.pl?file=gec00.t00z.pgrb2anl&lev_2_m_above_ground=on&lev_surface=on&var_TMP=on&subregion=&leftlon=-98&rightlon=-88&toplat=54&bottomlat=46&dir=%2Fgefs.20160830%2F00%2Fpgrb2
            
            #Example repos from config file, note substitution parameters (%X) in :FileName
           :SourceData  
           0:URL                http://nomads.ncep.noaa.gov/cgi-bin/              
           1:FileName           filter_%S1.pl?file=%S2gep%E.t%Hz.pgrb2af%T&%query&subregion=&leftlon=-98&rightlon=-88&toplat=54&bottomlat=46&dir=%2F%S3.%Y%m%d%2F00%2Fpgrb2a
           2:DeltaTimeStart     6                                                                          
           3:DeltaTimeEnd       240                                                                         
           4:DeltaTimeStep      6                                                                          
           5:StitchTimeStart    6                                                                          
           6:StitchTimeEnd      240                                                                         
           7:Grouping           tem                                                                        
           8:Type               NOMAD_GFS                                                                        
           9:Forecast           3
           10:num_ensembles     20   
           :EndSourceData

    Returns:
        NULL - downloads grib files from online repository
    """
    
    #build repository directory to store the date's files
    today_repo_path = repo_path + "/" + timestamp + "/"
    FrameworkLibrary.build_dir(today_repo_path)

    #get arguments from repos, we assume that 2 NOMADS sources won't be stitched together 
    #(as we do with the datamart GEM regional and global model), hence only grab the first string (ie. repos[*][0])
    url = repos[0][0]
    DeltaTimeStart = int(repos[2][0])
    DeltaTimeEnd = int(repos[3][0])
    DeltaTimeStep = int(repos[4][0])
    Source =  repos[8][0]
    Grouping = repos[7][0]
    num_ensembles = int(repos[10][0])
    wget_list = []

    print 'building list of files for download'
    for k in range(1,num_ensembles + 1): #for each ensemble member
        #set progress bar
        pbar = k/float(num_ensembles) * 40
        sys.stdout.write('\r')
        # the exact output you're looking for:
        sys.stdout.write("[%-40s] %d%%" % ('='*int(pbar), pbar/40*100))
        sys.stdout.flush()

        for j in range(DeltaTimeStart/DeltaTimeStep,DeltaTimeEnd/DeltaTimeStep + 1): #for each timestep

            ensemble = str(k).zfill(2) #save ensemble number in 2 digit format
          
            #Set timestep and replace in file name
            DeltaTime = j * DeltaTimeStep
            name = repos[1][0].replace('%T', str(DeltaTime).zfill(2))
            
            #replace the ensemble number in file name
            name = name.replace('%E',ensemble)
            
            #replace the data request in file name
            if Grouping == 'met':
                name = name.replace('%query', 'lev_surface=on&var_APCP=on')
                
            if Grouping == 'tem':
                name = name.replace('%query', 'lev_2_m_above_ground=on&var_TMP')   

            #replace the source in the file name (ie. CMC NAEFS, or GFS NAEFS)
            if Source == 'NOMAD_GFS':
                name = name.replace('%S1', 'gens')
                name = name.replace('%S2', '')
                name = name.replace('%S3', 'gefs')
                
            if Source == 'NOMAD_CMC':
                name = name.replace('%S1', 'cmcens')
                name = name.replace('%S2', 'cmc_')
                name = name.replace('%S3', 'cmce')
                
            #concatenate and create wget command
            downloadname = url + name
            filename = Source + '_' + Grouping + '_' + ensemble + '_' +  str(DeltaTime).zfill(3) + '_' + timestamp + '.grib2'
            cmd = "wget -q -O " + today_repo_path + filename + " " + '"' + downloadname + '"' + " 2> NUL"
            
            #append to wget download list if file doesn't exist locally
            if not os.path.isfile(today_repo_path + filename): #if file does not exist locally
                  wget_list.append(cmd)
                  
    #now run wget with multiple threads, this speeds up download time considerably
    print '\nDownloading Files... \n'
    pool = multiprocessing.Pool(processes = 20)
    pool.map(os.system,wget_list)
Esempio n. 10
0
def repo_pull_datamart(repos,filePath,timestamp,repo_path):
    """
    Downloads forecast data from EC datamart repository using wget
    http://dd.weather.gc.ca/
    
    Args:
        repos: the source data in a single source from the config file, see below for example
        filePath: the filepath where the scripts are run ex) Q:\WR_Ensemble_dev\A_MS\Repo\scripts
        timestamp: datestamp + start hour, this is currently the config.file date with a static start hour of '00'
        repo_path: path to store all the repo data; currently 'config_file.grib_forecast_repo'
        
        [0]   :URL                http://dd.weather.gc.ca/model_gem_regional/10km/grib2/%H/%T/                http://dd.weather.gc.ca/model_gem_global/25km/grib2/lat_lon/%H/%T/ 
        [1]   :FileName           CMC_reg_APCP_SFC_0_ps10km_%Y%m%d%H_P%T.grib2                                CMC_glb_APCP_SFC_0_latlon.24x.24_%Y%m%d%H_P%T.grib2
        [2]   :DeltaTimeStart     3                                                                           3            
        [3]   :DeltaTimeEnd       48                                                                          240        
        [4]   :DeltaTimeStep      3                                                                           3                          
        [5]   :StitchTimeStart    3                                                                           48
        [6]   :StitchTimeEnd      48                                                                          240
        [7]   :Grouping           met                                                                         met   
        [8]   :Type               GEM                                                                         GEM
        [9]   :Forecast           1                                                                           1

    Returns:
        NULL - downloads grib files from online repository
    """

    #build repository directory to store the date's files
    today_repo_path = repo_path + "/" + timestamp + "/"
    FrameworkLibrary.build_dir(today_repo_path)

    #for each of the 'series' that are being stitched together (typically 1 or 2)
    for i, url in enumerate(repos[0]): 
      DeltaTimeStart = int(repos[2][i])
      DeltaTimeEnd = int(repos[3][i])
      DeltaTimeStep = int(repos[4][i])
      
      #loop through the time series
      for j in range(DeltaTimeStart/DeltaTimeStep,DeltaTimeEnd/DeltaTimeStep + 1):
        #set progress bar
        pbar = (j+1-DeltaTimeStart/DeltaTimeStep)/float((DeltaTimeEnd/DeltaTimeStep + 1)-DeltaTimeStart/DeltaTimeStep) * 40
        sys.stdout.write('\r')
        # the exact output you're looking for:
        sys.stdout.write("[%-40s] %d%%" % ('='*int(pbar), pbar/40*100))
        sys.stdout.flush()
      
        DeltaTime = j * DeltaTimeStep
        #replace %T with the deltaT
        url = repos[0][i].replace('%T', str(DeltaTime).zfill(3))
        name = repos[1][i].replace('%T', str(DeltaTime).zfill(3))
        
        filename = url + name
        
        #run wget
        if not os.path.isfile(today_repo_path + name): #if file does not exist locally
          try: #download if remote file exists
              urllib2.urlopen(filename) #command to see if remote file can be opened
              os.system("wget -q -O " + today_repo_path + name + " " + filename + " 2> NUL") #use wget to actually download the file
          except urllib2.URLError as e: #do nothing if remote file doesn't exist
              print " Error: File does not exist locally or remotely"
        

      print "\n"