Example #1
0
def repo_pull_nomads(repos, filePath, timestamp, repo_path):
    """
    Downloads forecast data from NOMADS repository using wget
    
        Args:
        repos: the source data in a single source from the config file, see below for example
        filePath: the filepath where the scripts are run ex) Q:\WR_Ensemble_dev\A_MS\Repo\scripts
        timestamp: datestamp + start hour, this is currently the config.file date with a static start hour of '00'
        repo_path: path to store all the repo data; currently 'config_file.grib_forecast_repo'
        
            Website:
            http://nomads.ncep.noaa.gov/txt_descriptions/CMCENS_doc.shtml
            http://nomads.ncep.noaa.gov/cgi-bin/filter_cmcens.pl?file=cmc_gep00.t00z.pgrb2af384&lev_surface=on&var_APCP=on&var_TMP=on&subregion=&leftlon=-98&rightlon=-88&toplat=54&bottomlat=46&dir=%2Fcmce.20160830%2F00%2Fpgrb2a
            http://nomads.ncep.noaa.gov/cgi-bin/filter_gens.pl?file=gec00.t00z.pgrb2anl&lev_2_m_above_ground=on&lev_surface=on&var_TMP=on&subregion=&leftlon=-98&rightlon=-88&toplat=54&bottomlat=46&dir=%2Fgefs.20160830%2F00%2Fpgrb2
            
            #Example repos from config file, note substitution parameters (%X) in :FileName
           :SourceData  
           0:URL                http://nomads.ncep.noaa.gov/cgi-bin/              
           1:FileName           filter_%S1.pl?file=%S2gep%E.t%Hz.pgrb2af%T&%query&subregion=&leftlon=-98&rightlon=-88&toplat=54&bottomlat=46&dir=%2F%S3.%Y%m%d%2F00%2Fpgrb2a
           2:DeltaTimeStart     6                                                                          
           3:DeltaTimeEnd       240                                                                         
           4:DeltaTimeStep      6                                                                          
           5:StitchTimeStart    6                                                                          
           6:StitchTimeEnd      240                                                                         
           7:Grouping           tem                                                                        
           8:Type               NOMAD_GFS                                                                        
           9:Forecast           3
           10:num_ensembles     20   
           :EndSourceData

    Returns:
        NULL - downloads grib files from online repository
    """
    
    #build repository directory to store the date's files
    today_repo_path = repo_path + "/" + timestamp + "/"
    FrameworkLibrary.build_dir(today_repo_path)

    #get arguments from repos, we assume that 2 NOMADS sources won't be stitched together 
    #(as we do with the datamart GEM regional and global model), hence only grab the first string (ie. repos[*][0])
    url = repos[0][0]
    DeltaTimeStart = int(repos[2][0])
    DeltaTimeEnd = int(repos[3][0])
    DeltaTimeStep = int(repos[4][0])
    Source =  repos[8][0]
    Grouping = repos[7][0]
    num_ensembles = int(repos[10][0])
    wget_list = []

    print 'building list of files for download'
    for k in range(1,num_ensembles + 1): #for each ensemble member
        #set progress bar
        pbar = k/float(num_ensembles) * 40
        sys.stdout.write('\r')
        # the exact output you're looking for:
        sys.stdout.write("[%-40s] %d%%" % ('='*int(pbar), pbar/40*100))
        sys.stdout.flush()

        for j in range(DeltaTimeStart/DeltaTimeStep,DeltaTimeEnd/DeltaTimeStep + 1): #for each timestep

            ensemble = str(k).zfill(2) #save ensemble number in 2 digit format
          
            #Set timestep and replace in file name
            DeltaTime = j * DeltaTimeStep
            name = repos[1][0].replace('%T', str(DeltaTime).zfill(2))
            
            #replace the ensemble number in file name
            name = name.replace('%E',ensemble)
            
            #replace the data request in file name
            if Grouping == 'met':
                name = name.replace('%query', 'lev_surface=on&var_APCP=on')
                
            if Grouping == 'tem':
                name = name.replace('%query', 'lev_2_m_above_ground=on&var_TMP')   

            #replace the source in the file name (ie. CMC NAEFS, or GFS NAEFS)
            if Source == 'NOMAD_GFS':
                name = name.replace('%S1', 'gens')
                name = name.replace('%S2', '')
                name = name.replace('%S3', 'gefs')
                
            if Source == 'NOMAD_CMC':
                name = name.replace('%S1', 'cmcens')
                name = name.replace('%S2', 'cmc_')
                name = name.replace('%S3', 'cmce')
                
            #concatenate and create wget command
            downloadname = url + name
            filename = Source + '_' + Grouping + '_' + ensemble + '_' +  str(DeltaTime).zfill(3) + '_' + timestamp + '.grib2'
            cmd = "wget -q -O " + today_repo_path + filename + " " + '"' + downloadname + '"' + " 2> NUL"
            
            #append to wget download list if file doesn't exist locally
            if not os.path.isfile(today_repo_path + filename): #if file does not exist locally
                  wget_list.append(cmd)
                  
    #now run wget with multiple threads, this speeds up download time considerably
    print '\nDownloading Files... \n'
    pool = multiprocessing.Pool(processes = 20)
    pool.map(os.system,wget_list)
Example #2
0
def repo_pull_nomads(repos, filePath, timestamp, repo_path):
    """
    Downloads forecast data from NOMADS repository using wget
    
        Args:
        repos: the source data in a single source from the config file, see below for example
        filePath: the filepath where the scripts are run ex) Q:\WR_Ensemble_dev\A_MS\Repo\scripts
        timestamp: datestamp + start hour, this is currently the config.file date with a static start hour of '00'
        repo_path: path to store all the repo data; currently 'config_file.grib_forecast_repo'
        
            Website:
            http://nomads.ncep.noaa.gov/txt_descriptions/CMCENS_doc.shtml
            http://nomads.ncep.noaa.gov/cgi-bin/filter_cmcens.pl?file=cmc_gep00.t00z.pgrb2af384&lev_surface=on&var_APCP=on&var_TMP=on&subregion=&leftlon=-98&rightlon=-88&toplat=54&bottomlat=46&dir=%2Fcmce.20160830%2F00%2Fpgrb2a
            http://nomads.ncep.noaa.gov/cgi-bin/filter_gens.pl?file=gec00.t00z.pgrb2anl&lev_2_m_above_ground=on&lev_surface=on&var_TMP=on&subregion=&leftlon=-98&rightlon=-88&toplat=54&bottomlat=46&dir=%2Fgefs.20160830%2F00%2Fpgrb2
            
            #Example repos from config file, note substitution parameters (%X) in :FileName
           :SourceData  
           0:URL                http://nomads.ncep.noaa.gov/cgi-bin/              
           1:FileName           filter_%S1.pl?file=%S2gep%E.t%Hz.pgrb2af%T&%query&subregion=&leftlon=-98&rightlon=-88&toplat=54&bottomlat=46&dir=%2F%S3.%Y%m%d%2F00%2Fpgrb2a
           2:DeltaTimeStart     6                                                                          
           3:DeltaTimeEnd       240                                                                         
           4:DeltaTimeStep      6                                                                          
           5:StitchTimeStart    6                                                                          
           6:StitchTimeEnd      240                                                                         
           7:Grouping           tem                                                                        
           8:Type               NOMAD_GFS                                                                        
           9:Forecast           3
           10:num_ensembles     20   
           :EndSourceData

    Returns:
        NULL - downloads grib files from online repository
    """
    
    #build repository directory to store the date's files
    today_repo_path = repo_path + "/" + timestamp + "/"
    FrameworkLibrary.build_dir(today_repo_path)

    #get arguments from repos, we assume that 2 NOMADS sources won't be stitched together 
    #(as we do with the datamart GEM regional and global model), hence only grab the first string (ie. repos[*][0])
    url = repos[0][0]
    DeltaTimeStart = int(repos[2][0])
    DeltaTimeEnd = int(repos[3][0])
    DeltaTimeStep = int(repos[4][0])
    Source =  repos[8][0]
    Grouping = repos[7][0]
    num_ensembles = int(repos[10][0])
    wget_list = []

    print 'building list of files for download'
    for k in range(1,num_ensembles + 1): #for each ensemble member
        #set progress bar
        pbar = k/float(num_ensembles) * 40
        sys.stdout.write('\r')
        # the exact output you're looking for:
        sys.stdout.write("[%-40s] %d%%" % ('='*int(pbar), pbar/40*100))
        sys.stdout.flush()

        for j in range(DeltaTimeStart/DeltaTimeStep,DeltaTimeEnd/DeltaTimeStep + 1): #for each timestep

            ensemble = str(k).zfill(2) #save ensemble number in 2 digit format
          
            #Set timestep and replace in file name
            DeltaTime = j * DeltaTimeStep
            name = repos[1][0].replace('%T', str(DeltaTime).zfill(2))
            
            #replace the ensemble number in file name
            name = name.replace('%E',ensemble)
            
            #replace the data request in file name
            if Grouping == 'met':
                name = name.replace('%query', 'lev_surface=on&var_APCP=on')
                
            if Grouping == 'tem':
                name = name.replace('%query', 'lev_2_m_above_ground=on&var_TMP')   

            #replace the source in the file name (ie. CMC NAEFS, or GFS NAEFS)
            if Source == 'NOMAD_GFS':
                name = name.replace('%S1', 'gens')
                name = name.replace('%S2', '')
                name = name.replace('%S3', 'gefs')
                
            if Source == 'NOMAD_CMC':
                name = name.replace('%S1', 'cmcens')
                name = name.replace('%S2', 'cmc_')
                name = name.replace('%S3', 'cmce')
                
            #concatenate and create wget command
            downloadname = url + name
            filename = Source + '_' + Grouping + '_' + ensemble + '_' +  str(DeltaTime).zfill(3) + '_' + timestamp + '.grib2'
            cmd = "wget -q -O " + today_repo_path + filename + " " + '"' + downloadname + '"' + " 2> NUL"
            
            #append to wget download list if file doesn't exist locally
            if not os.path.isfile(today_repo_path + filename): #if file does not exist locally
                  wget_list.append(cmd)
                  
    #now run wget with multiple threads, this speeds up download time considerably
    print '\nDownloading Files... \n'
    pool = multiprocessing.Pool(processes = 20)
    pool.map(os.system,wget_list)
Example #3
0
def repo_pull_datamart(repos,filePath,timestamp,repo_path):
    """
    Downloads forecast data from EC datamart repository using wget
    http://dd.weather.gc.ca/
    
    Args:
        repos: the source data in a single source from the config file, see below for example
        filePath: the filepath where the scripts are run ex) Q:\WR_Ensemble_dev\A_MS\Repo\scripts
        timestamp: datestamp + start hour, this is currently the config.file date with a static start hour of '00'
        repo_path: path to store all the repo data; currently 'config_file.grib_forecast_repo'
        
        [0]   :URL                http://dd.weather.gc.ca/model_gem_regional/10km/grib2/%H/%T/                http://dd.weather.gc.ca/model_gem_global/25km/grib2/lat_lon/%H/%T/ 
        [1]   :FileName           CMC_reg_APCP_SFC_0_ps10km_%Y%m%d%H_P%T.grib2                                CMC_glb_APCP_SFC_0_latlon.24x.24_%Y%m%d%H_P%T.grib2
        [2]   :DeltaTimeStart     3                                                                           3            
        [3]   :DeltaTimeEnd       48                                                                          240        
        [4]   :DeltaTimeStep      3                                                                           3                          
        [5]   :StitchTimeStart    3                                                                           48
        [6]   :StitchTimeEnd      48                                                                          240
        [7]   :Grouping           met                                                                         met   
        [8]   :Type               GEM                                                                         GEM
        [9]   :Forecast           1                                                                           1

    Returns:
        NULL - downloads grib files from online repository
    """

    #build repository directory to store the date's files
    today_repo_path = repo_path + "/" + timestamp + "/"
    FrameworkLibrary.build_dir(today_repo_path)

    #for each of the 'series' that are being stitched together (typically 1 or 2)
    for i, url in enumerate(repos[0]): 
      DeltaTimeStart = int(repos[2][i])
      DeltaTimeEnd = int(repos[3][i])
      DeltaTimeStep = int(repos[4][i])
      
      #loop through the time series
      for j in range(DeltaTimeStart/DeltaTimeStep,DeltaTimeEnd/DeltaTimeStep + 1):
        #set progress bar
        pbar = (j+1-DeltaTimeStart/DeltaTimeStep)/float((DeltaTimeEnd/DeltaTimeStep + 1)-DeltaTimeStart/DeltaTimeStep) * 40
        sys.stdout.write('\r')
        # the exact output you're looking for:
        sys.stdout.write("[%-40s] %d%%" % ('='*int(pbar), pbar/40*100))
        sys.stdout.flush()
      
        DeltaTime = j * DeltaTimeStep
        #replace %T with the deltaT
        url = repos[0][i].replace('%T', str(DeltaTime).zfill(3))
        name = repos[1][i].replace('%T', str(DeltaTime).zfill(3))
        
        filename = url + name
        
        #run wget
        if not os.path.isfile(today_repo_path + name): #if file does not exist locally
          try: #download if remote file exists
              urllib2.urlopen(filename) #command to see if remote file can be opened
              os.system("wget -q -O " + today_repo_path + name + " " + filename + " 2> NUL") #use wget to actually download the file
          except urllib2.URLError as e: #do nothing if remote file doesn't exist
              print " Error: File does not exist locally or remotely"
        

      print "\n"
Example #4
0
def repo_pull_datamart(repos,filePath,timestamp,repo_path):
    """
    Downloads forecast data from EC datamart repository using wget
    http://dd.weather.gc.ca/
    
    Args:
        repos: the source data in a single source from the config file, see below for example
        filePath: the filepath where the scripts are run ex) Q:\WR_Ensemble_dev\A_MS\Repo\scripts
        timestamp: datestamp + start hour, this is currently the config.file date with a static start hour of '00'
        repo_path: path to store all the repo data; currently 'config_file.grib_forecast_repo'
        
        [0]   :URL                http://dd.weather.gc.ca/model_gem_regional/10km/grib2/%H/%T/                http://dd.weather.gc.ca/model_gem_global/25km/grib2/lat_lon/%H/%T/ 
        [1]   :FileName           CMC_reg_APCP_SFC_0_ps10km_%Y%m%d%H_P%T.grib2                                CMC_glb_APCP_SFC_0_latlon.24x.24_%Y%m%d%H_P%T.grib2
        [2]   :DeltaTimeStart     3                                                                           3            
        [3]   :DeltaTimeEnd       48                                                                          240        
        [4]   :DeltaTimeStep      3                                                                           3                          
        [5]   :StitchTimeStart    3                                                                           48
        [6]   :StitchTimeEnd      48                                                                          240
        [7]   :Grouping           met                                                                         met   
        [8]   :Type               GEM                                                                         GEM
        [9]   :Forecast           1                                                                           1

    Returns:
        NULL - downloads grib files from online repository
    """

    #build repository directory to store the date's files
    today_repo_path = repo_path + "/" + timestamp + "/"
    FrameworkLibrary.build_dir(today_repo_path)

    #for each of the 'series' that are being stitched together (typically 1 or 2)
    for i, url in enumerate(repos[0]): 
      DeltaTimeStart = int(repos[2][i])
      DeltaTimeEnd = int(repos[3][i])
      DeltaTimeStep = int(repos[4][i])
      
      #loop through the time series
      for j in range(DeltaTimeStart/DeltaTimeStep,DeltaTimeEnd/DeltaTimeStep + 1):
        #set progress bar
        pbar = (j+1-DeltaTimeStart/DeltaTimeStep)/float((DeltaTimeEnd/DeltaTimeStep + 1)-DeltaTimeStart/DeltaTimeStep) * 40
        sys.stdout.write('\r')
        # the exact output you're looking for:
        sys.stdout.write("[%-40s] %d%%" % ('='*int(pbar), pbar/40*100))
        sys.stdout.flush()
      
        DeltaTime = j * DeltaTimeStep
        #replace %T with the deltaT
        url = repos[0][i].replace('%T', str(DeltaTime).zfill(3))
        name = repos[1][i].replace('%T', str(DeltaTime).zfill(3))
        
        filename = url + name
        
        #run wget
        if not os.path.isfile(today_repo_path + name): #if file does not exist locally
          try: #download if remote file exists
              urllib2.urlopen(filename) #command to see if remote file can be opened
              os.system("wget -q -O " + today_repo_path + name + " " + filename + " 2> NUL") #use wget to actually download the file
          except urllib2.URLError as e: #do nothing if remote file doesn't exist
              print " Error: File does not exist locally or remotely"
        

      print "\n"