def standalone_main(): parser = ArgumentParser(description="Splices together input HDF5 products for a given set of sounding ids") parser.add_argument( "filenames", metavar="FILE", nargs='*', help="files to splice, may be left blank if using the -i --input-files-list option" ) parser.add_argument( "-i", "--input-files-list", dest="input_files_list", metavar="FILE", help="text file with input filenames to splice") parser.add_argument( "-s", "--sounding-id-file", dest="sounding_id_file", metavar="FILE", help="file containing list of soundings for destination file") parser.add_argument( "-o", "--output-file", dest="output_filename", metavar="FILE", default=DEFAULT_OUTPUT_FILENAME, help="output filename of splice data, default: %s" % DEFAULT_OUTPUT_FILENAME) parser.add_argument( "-d", "--datasets-list-file", dest="datasets_list_file", metavar="FILE", help="file containing list of only datasets to consider for copying. If rename_mapping is enabled then the names are matched on their destination dataset name") parser.add_argument( "-r", "--rename-mapping", dest="rename_mapping", action="store_true", default=False, help="rename datasets into output file according to internal mapping table as they would appear in the L2Agg PGE") parser.add_argument( "-w", "--workers", dest="workers", type=int, default=1, help="Number of workers to use when parallelizing splicing" ) parser.add_argument( "--temp", dest="temp_dir", default=os.curdir, help="Directory where temporary files are saved when number of parallel workers is greater than 1" ) parser.add_argument( "-l", "--log_file", dest="log_file", help="Save verbose information to log file" ) parser.add_argument( "--agg-names-filter", dest="agg_names_filter", action="store_true", default=False, help="include only dataset names that would appear in the L2Agg PGE. Its only makes sense to use this option with --rename_mapping") parser.add_argument( "--splice-all", dest="splice_all", action="store_true", default=False, help="splice all datasets, including those which do not have a sounding dimension. Note that datasets without an explicit handler and no sounding dimension are simply copied from the first file.") parser.add_argument( "--multiple-file-types", dest="multi_source_types", action="store_true", default=None, help="indicates that multiple file type sources are being spliced. Speeds up multiple source type determination stage by being specified." ) parser.add_argument( "--single-file-type", dest="multi_source_types", action="store_false", default=None, help="indicates that a single type of file is being spliced. Speeds up multiple source type determination stage by being specified." ) parser.add_argument( "-v", "--verbose", dest="verbose", action="store_true", default=False, help="enable verbose informational reporting") # Parse command line arguments args = parser.parse_args() if len(args.filenames) == 0 and args.input_files_list == None: parser.error("Input list file must be specified") # Set up logging if args.verbose: # Include HDF5 errors in output h5py._errors.unsilence_errors() log_level = logging.DEBUG else: log_level = logging.INFO main_logger = log_util.init_logging(log_level=log_level, format="%(message)s") # Initialize logging if args.log_file: log_file = log_util.open_log_file(args.log_file, logger=main_logger) log_file.setFormatter( logging.Formatter("%(asctime)s: %(name)8s - %(levelname)7s - %(message)s") ) else: log_file = None source_files = load_source_files(args.filenames, args.input_files_list) if args.sounding_id_file != None: sounding_ids = [ sid.strip() for sid in open(args.sounding_id_file).readlines() ] else: main_logger.debug("No sounding ids file supplied, aggregating all ids from all files") sounding_ids = None copy_datasets_list = None if args.datasets_list_file != None: copy_datasets_list = [ ds.strip() for ds in open(args.datasets_list_file).readlines() ] if args.agg_names_filter: if copy_datasets_list == None: copy_datasets_list = aggregator_dataset_dest_names else: copy_datasets_list += aggregator_dataset_dest_names process_files(source_files, args.output_filename, sounding_ids, splice_all=args.splice_all, desired_datasets_list=copy_datasets_list, rename_mapping=args.rename_mapping, multi_source_types=args.multi_source_types, workers=args.workers, temp_dir=args.temp_dir, main_logger=main_logger, log_file=log_file)
def standalone_main(): parser = ArgumentParser( description= "Splices together input HDF5 products for a given set of sounding ids") parser.add_argument( "filenames", metavar="FILE", nargs='*', help= "files to splice, may be left blank if using the -i --input-files-list option" ) parser.add_argument("-i", "--input-files-list", dest="input_files_list", metavar="FILE", help="text file with input filenames to splice") parser.add_argument( "-s", "--sounding-id-file", dest="sounding_id_file", metavar="FILE", help="file containing list of soundings for destination file") parser.add_argument("-o", "--output-file", dest="output_filename", metavar="FILE", default=DEFAULT_OUTPUT_FILENAME, help="output filename of splice data, default: %s" % DEFAULT_OUTPUT_FILENAME) parser.add_argument( "-d", "--datasets-list-file", dest="datasets_list_file", metavar="FILE", help= "file containing list of only datasets to consider for copying. If rename_mapping is enabled then the names are matched on their destination dataset name" ) parser.add_argument( "-r", "--rename-mapping", dest="rename_mapping", action="store_true", default=False, help= "rename datasets into output file according to internal mapping table as they would appear in the L2Agg PGE" ) parser.add_argument( "-w", "--workers", dest="workers", type=int, default=1, help="Number of workers to use when parallelizing splicing") parser.add_argument( "--temp", dest="temp_dir", default=os.curdir, help= "Directory where temporary files are saved when number of parallel workers is greater than 1" ) parser.add_argument("-l", "--log_file", dest="log_file", help="Save verbose information to log file") parser.add_argument( "--agg-names-filter", dest="agg_names_filter", action="store_true", default=False, help= "include only dataset names that would appear in the L2Agg PGE. Its only makes sense to use this option with --rename_mapping" ) parser.add_argument( "--splice-all", dest="splice_all", action="store_true", default=False, help= "splice all datasets, including those which do not have a sounding dimension. Note that datasets without an explicit handler and no sounding dimension are simply copied from the first file." ) parser.add_argument( "--multiple-file-types", dest="multi_source_types", action="store_true", default=None, help= "indicates that multiple file type sources are being spliced. Speeds up multiple source type determination stage by being specified." ) parser.add_argument( "--single-file-type", dest="multi_source_types", action="store_false", default=None, help= "indicates that a single type of file is being spliced. Speeds up multiple source type determination stage by being specified." ) parser.add_argument("-v", "--verbose", dest="verbose", action="store_true", default=False, help="enable verbose informational reporting") # Parse command line arguments args = parser.parse_args() if len(args.filenames) == 0 and args.input_files_list == None: parser.error("Input list file must be specified") # Set up logging if args.verbose: # Include HDF5 errors in output h5py._errors.unsilence_errors() log_level = logging.DEBUG else: log_level = logging.INFO main_logger = log_util.init_logging(log_level=log_level, format="%(message)s") # Initialize logging if args.log_file: log_file = log_util.open_log_file(args.log_file, logger=main_logger) log_file.setFormatter( logging.Formatter( "%(asctime)s: %(name)8s - %(levelname)7s - %(message)s")) else: log_file = None source_files = load_source_files(args.filenames, args.input_files_list) if args.sounding_id_file != None: sounding_ids = [ sid.strip() for sid in open(args.sounding_id_file).readlines() ] else: main_logger.debug( "No sounding ids file supplied, aggregating all ids from all files" ) sounding_ids = None copy_datasets_list = None if args.datasets_list_file != None: copy_datasets_list = [ ds.strip() for ds in open(args.datasets_list_file).readlines() ] if args.agg_names_filter: if copy_datasets_list == None: copy_datasets_list = aggregator_dataset_dest_names else: copy_datasets_list += aggregator_dataset_dest_names process_files(source_files, args.output_filename, sounding_ids, splice_all=args.splice_all, desired_datasets_list=copy_datasets_list, rename_mapping=args.rename_mapping, multi_source_types=args.multi_source_types, workers=args.workers, temp_dir=args.temp_dir, main_logger=main_logger, log_file=log_file)
to stdout ''' args = docopt_simple(usage, version=version) # Logger for file operations logger = logging.getLogger(os.path.basename(__file__)) if args.quiet: log_util.init_logging(logging.ERROR) else: log_util.init_logging(logging.INFO) # Initialize logging if args.log_file: log_obj = log_util.open_log_file(args.log_file) log_obj.setFormatter( logging.Formatter( "%(asctime)s: %(name)25s - %(levelname)7s - %(message)s")) else: log_obj = None populate_options = {} if (args.binary): populate_options["l2_binary_filename"] = args.binary if (args.l2_config): populate_options["l2_config_filename"] = args.l2_config populate_options["aggregate"] = args.aggregate populate_options["abscoversion"] = args.absco_version populate_options["target_cluster"] = args.target_cluster populate_options["group_size"] = int(args.group_size) populate_options["parallel_size"] = int(args.parallel_size)
to stdout """ args = docopt_simple(usage, version=version) # Logger for file operations logger = logging.getLogger(os.path.basename(__file__)) if args.quiet: log_util.init_logging(logging.ERROR) else: log_util.init_logging(logging.INFO) # Initialize logging if args.log_file: log_obj = log_util.open_log_file(args.log_file) log_obj.setFormatter(logging.Formatter("%(asctime)s: %(name)25s - %(levelname)7s - %(message)s")) else: log_obj = None populate_options = {} if args.binary: populate_options["l2_binary_filename"] = args.binary if args.l2_config: populate_options["l2_config_filename"] = args.l2_config populate_options["aggregate"] = args.aggregate populate_options["abscoversion"] = args.absco_version populate_options["target_cluster"] = args.target_cluster populate_options["group_size"] = int(args.group_size) populate_options["parallel_size"] = int(args.parallel_size) for config_file in args.config_file: