def archive_aux_data(self): ''' Description: Defines the main processing method for the class. ''' # Figure out the names of the files to retrieve names = list(self.get_name_list()) # Establish a logged in session session = Web.Session( block_size=Config.get('http_transfer_block_size')) # Log in session.login(Config.get('ucar.login_credentials.login_url'), Config.get('ucar.login_credentials.login_data')) for name in names: filename = '{0}.tar'.format(name) self.logger.info('Retrieving {0}'.format(filename)) year = name[7:11] url = Config.get('ucar.url_format').format(year, filename) session.http_transfer_file(url, filename) # Extract the tar'd data cmd = ['tar', '-xvf', filename] cmd = ' '.join(cmd) grib_files = System.execute_cmd(cmd) if grib_files is not None and len(grib_files) > 0: self.logger.info(grib_files) # For each parameter we need for variable in Config.get('narr_variables'): self.logger.info('Processing Variable [{0}]'.format(variable)) for grib_file in grib_files.split(): self.process_grib_for_variable(variable, grib_file) # Cleanup - Extracted grib files for grib_file in grib_files.split(): if os.path.exists(grib_file): os.unlink(grib_file) # Cleanup - The Tar ball if os.path.exists(filename): os.unlink(filename)
def get_name_list(self): ''' Description: Determines all of the base filenames to process based on the dates provided. Notes: Files typically contain 3 days. Special assumptions are coded for the end of the month; which may have 1, 2, 3, or 4 days ... depending on the month and year. ''' name_format = Config.get('ucar.remote_name_format') days_3 = timedelta(days=3) c_date = self.s_date while c_date <= self.e_date: if c_date.day == 28: days = calendar.monthrange(c_date.year, c_date.month)[1] yield(name_format.format(c_date.year, c_date.month, c_date.day, days)) delta = timedelta(days=(days - 28 + 1)) c_date += delta else: yield(name_format.format(c_date.year, c_date.month, c_date.day, c_date.day + 2)) c_date += days_3
def get_name_list(self): ''' Description: Determines all of the base filenames to process based on the dates provided. Notes: Files typically contain 3 days. Special assumptions are coded for the end of the month; which may have 1, 2, 3, or 4 days ... depending on the month and year. ''' name_format = Config.get('ucar.remote_name_format') days_3 = timedelta(days=3) c_date = self.s_date while c_date <= self.e_date: if c_date.day == 28: days = calendar.monthrange(c_date.year, c_date.month)[1] yield (name_format.format(c_date.year, c_date.month, c_date.day, days)) delta = timedelta(days=(days - 28 + 1)) c_date += delta else: yield (name_format.format(c_date.year, c_date.month, c_date.day, c_date.day + 2)) c_date += days_3
def main(): ''' Description: Provides the setup and executaion of the processor for the application. ''' # The config file is located in the same place as this script Config.read_config(os.path.dirname(__file__)) # Setup the default logger format and level. log to STDOUT logging.basicConfig(format=('%(asctime)s.%(msecs)03d %(process)d' ' %(levelname)-8s' ' %(filename)s:%(lineno)d:' '%(funcName)s -- %(message)s'), datefmt='%Y-%m-%d %H:%M:%S', level=logging.INFO, stream=sys.stdout) # Get the logger logger = logging.getLogger(__name__) # Turn down the requests and urllib3 logging otherwise they fill the log # file with mostly useless information requests_logger = logging.getLogger("requests") requests_logger.setLevel(logging.WARNING) urllib3_logger = logging.getLogger("urllib3") urllib3_logger.setLevel(logging.WARNING) # Process the command line (s_date, e_date) = parse_commandline() try: # Create the processor object processor = NARR_AuxProcessor(s_date, e_date) # Call the main processing routine processor.archive_aux_data() except Exception: logger.exception('Processing Failed') sys.exit(1) # EXIT FAILURE sys.exit(0) # EXIT SUCCESS
def main(): ''' Description: Ensures all data between start_date and end_date are up to date. Precondition: start_date and end_date are of type datetime.datetime start_date and end_date can also be of type datetime.date ''' # The config file is located in the same place as this script Config.read_config(os.path.dirname(__file__)) cmd_args = parse_arguments() setup_logging(cmd_args.debug, cmd_args.verbose) logger = logging.getLogger(__name__) try: # Determine the data that exists within the date range data = NarrData.get_next_narr_data_gen(cmd_args.start_date, cmd_args.end_date) # Determine which files are stale or missing internally. data_to_be_updated = filter(lambda x: x.need_to_update(), data) if len(data_to_be_updated) == 0: logger.info('No data found for updating archive') else: logger.info('Will download {0} files'.format( len(data_to_be_updated))) if cmd_args.report: report(list(data_to_be_updated)) else: update(data_to_be_updated) except Exception: logger.exception('Processing Failed') sys.exit(1) # EXIT FAILURE sys.exit(0) # EXIT SUCCESS
def main(): ''' Description: Ensures all data between start_date and end_date are up to date. Precondition: start_date and end_date are of type datetime.datetime start_date and end_date can also be of type datetime.date ''' # The config file is located in the same place as this script Config.read_config(os.path.dirname(__file__)) cmd_args = parse_arguments() setup_logging(cmd_args.debug, cmd_args.verbose) logger = logging.getLogger(__name__) try: # Determine the data that exists within the date range data = NarrData.get_next_narr_data_gen(cmd_args.start_date, cmd_args.end_date) # Determine which files are stale or missing internally. data_to_be_updated = filter(lambda x: x.need_to_update(), data) if len(data_to_be_updated) == 0: logger.info('No data found for updating archive') else: logger.info('Will download {0} files'. format(len(data_to_be_updated))) if cmd_args.report: report(list(data_to_be_updated)) else: update(data_to_be_updated) except Exception: logger.exception('Processing Failed') sys.exit(1) # EXIT FAILURE sys.exit(0) # EXIT SUCCESS
def get_url(filename): '''Return the URL for external retrieval of the file''' return Config.get('ncep.url_format').format(filename)
def get_arch_dir(cls, year, month, day): '''TODO TODO TODO''' return (Config.get('archive_directory_format') .format(cls.get_base_aux_dir(), year, month, day))
def parse_arguments(): ''' Description: Parses arguments from the command line. ''' version_number = Version.version_number() default_date_range = int(Config.get('default_date_range')) # Create a command line arugment parser description = ('Downloads LST auxillary inputs, then archives them for' ' future use. Dates must be the in the format: "YYYYMMDD"') parser = ArgumentParser(description=description, formatter_class=ArgumentDefaultsHelpFormatter) # ---- Add parameters ---- parser.add_argument('--start-date', action='store', dest='start_date', metavar='YYYYMMDD', type=input_date_validation, required=False, default=(date.today() - timedelta(days=default_date_range)), help='The start date of the date range of auxiliary' ' data to download.') parser.add_argument('--end-date', action='store', dest='end_date', metavar='YYYYMMDD', type=input_date_validation, required=False, default=date.today(), help='The end date of the date range of auxiliary' ' data to download.') parser.add_argument('--date', action='store', dest='date', metavar='YYYYMMDD', type=input_date_validation, required=False, help='Sets both start and end date to this date.' ' Overrides start-date and end-date arguments.') parser.add_argument('--report', action='store_true', dest='report', default=False, help='Only report what will happen.') parser.add_argument('--verbose', action='store_true', dest='verbose', default=False, help='Turn verbose logging on.') parser.add_argument('--debug', action='store_true', dest='debug', default=False, help='Turn debug logging on.') parser.add_argument('--version', action='version', version='%(prog)s {0}'.format(version_number), help='Displays the version of the software.') # Parse the command line parameters args = parser.parse_args() # Check if date was specified. If so then override start and end. if args.date is not None: args.start_date = args.date args.end_date = args.date return args
def get_arch_filename(cls, variable, year, month, day, hour, ext): '''TODO TODO TODO''' return (Config.get('archive_name_format') .format(variable, year, month, day, hour*100, ext))
def get_filename(year, month, day, hour): '''Return the filename to grab on the external system''' fmt = Config.get('ncep.name_format') return fmt.format(year, month, day, hour)
def get_arch_dir(cls, year, month, day): '''TODO TODO TODO''' return (Config.get('archive_directory_format').format( cls.get_base_aux_dir(), year, month, day))
def process_grib_for_variable(self, variable, grib_file): ''' Description: Extract the specified variable from the grib file and archive it. ''' self.logger.info("Processing [{0}]".format(grib_file)) # Get the date information from the grib file parts = grib_file.split('.') year = int(parts[1][:4]) month = int(parts[1][4:6]) day = int(parts[1][6:8]) hour = int(parts[1][8:]) # Figure out the filenames to create hdr_name = (Config.get('archive_name_format').format( variable, year, month, day, hour * 100, 'hdr')) grb_name = (Config.get('archive_name_format').format( variable, year, month, day, hour * 100, 'grb')) # Create inventory/header file to extract the variable data cmd = ['wgrib', grib_file, '|', 'grep', variable, '>', hdr_name] cmd = ' '.join(cmd) self.logger.info('Executing [{0}]'.format(cmd)) output = System.execute_cmd(cmd) if output is not None and len(output) > 0: self.logger.info(output) # Create grib files for each variable cmd = [ 'cat', hdr_name, '|', 'wgrib', grib_file, '-i', '-grib', '-o', grb_name ] cmd = ' '.join(cmd) output = '' self.logger.info('Executing [{0}]'.format(cmd)) output = System.execute_cmd(cmd) if output is not None and len(output) > 0: self.logger.info(output) # Create new inventory/header file for the variable cmd = ['wgrib', grb_name, '|', 'grep', variable, '>', hdr_name] cmd = ' '.join(cmd) self.logger.info('Executing [{0}]'.format(cmd)) output = System.execute_cmd(cmd) if output is not None and len(output) > 0: self.logger.info(output) # Determine the directory to place the data and create it if it does # not exist dest_path = (Config.get('archive_directory_format').format( self.base_aux_dir, year, month, day)) System.create_directory(dest_path) # Archive the files self.logger.info('Archiving into [{0}]'.format(dest_path)) # GRIB dest_file = os.path.join(dest_path, grb_name) shutil.copyfile(grb_name, dest_file) # HEADER dest_file = os.path.join(dest_path, hdr_name) shutil.copyfile(hdr_name, dest_file) # Cleanup the working directory if os.path.exists(grb_name): os.unlink(grb_name) if os.path.exists(hdr_name): os.unlink(hdr_name)
def process_grib_for_variable(self, variable, grib_file): ''' Description: Extract the specified variable from the grib file and archive it. ''' self.logger.info("Processing [{0}]".format(grib_file)) # Get the date information from the grib file parts = grib_file.split('.') year = int(parts[1][:4]) month = int(parts[1][4:6]) day = int(parts[1][6:8]) hour = int(parts[1][8:]) # Figure out the filenames to create hdr_name = (Config.get('archive_name_format') .format(variable, year, month, day, hour*100, 'hdr')) grb_name = (Config.get('archive_name_format') .format(variable, year, month, day, hour*100, 'grb')) # Create inventory/header file to extract the variable data cmd = ['wgrib', grib_file, '|', 'grep', variable, '>', hdr_name] cmd = ' '.join(cmd) self.logger.info('Executing [{0}]'.format(cmd)) output = System.execute_cmd(cmd) if output is not None and len(output) > 0: self.logger.info(output) # Create grib files for each variable cmd = ['cat', hdr_name, '|', 'wgrib', grib_file, '-i', '-grib', '-o', grb_name] cmd = ' '.join(cmd) output = '' self.logger.info('Executing [{0}]'.format(cmd)) output = System.execute_cmd(cmd) if output is not None and len(output) > 0: self.logger.info(output) # Create new inventory/header file for the variable cmd = ['wgrib', grb_name, '|', 'grep', variable, '>', hdr_name] cmd = ' '.join(cmd) self.logger.info('Executing [{0}]'.format(cmd)) output = System.execute_cmd(cmd) if output is not None and len(output) > 0: self.logger.info(output) # Determine the directory to place the data and create it if it does # not exist dest_path = (Config.get('archive_directory_format') .format(self.base_aux_dir, year, month, day)) System.create_directory(dest_path) # Archive the files self.logger.info('Archiving into [{0}]'.format(dest_path)) # GRIB dest_file = os.path.join(dest_path, grb_name) shutil.copyfile(grb_name, dest_file) # HEADER dest_file = os.path.join(dest_path, hdr_name) shutil.copyfile(hdr_name, dest_file) # Cleanup the working directory if os.path.exists(grb_name): os.unlink(grb_name) if os.path.exists(hdr_name): os.unlink(hdr_name)
def get_arch_filename(cls, variable, year, month, day, hour, ext): '''TODO TODO TODO''' return (Config.get('archive_name_format').format( variable, year, month, day, hour * 100, ext))