def standalone_main():
    parser = ArgumentParser(description="Splices together input HDF5 products for a given set of sounding ids")

    parser.add_argument( "filenames", metavar="FILE", nargs='*',
                         help="files to splice, may be left blank if using the -i --input-files-list option" )

    parser.add_argument( "-i", "--input-files-list", dest="input_files_list",
                         metavar="FILE",
                         help="text file with input filenames to splice")

    parser.add_argument( "-s", "--sounding-id-file", dest="sounding_id_file",
                         metavar="FILE",
                         help="file containing list of soundings for destination file")

    parser.add_argument( "-o", "--output-file", dest="output_filename",
                         metavar="FILE", default=DEFAULT_OUTPUT_FILENAME,
                         help="output filename of splice data, default: %s" % DEFAULT_OUTPUT_FILENAME)

    parser.add_argument( "-d", "--datasets-list-file", dest="datasets_list_file",
                         metavar="FILE",
                         help="file containing list of only datasets to consider for copying. If rename_mapping is enabled then the names are matched on their destination dataset name")

    parser.add_argument( "-r", "--rename-mapping", dest="rename_mapping",
                         action="store_true",
                         default=False,
                         help="rename datasets into output file according to internal mapping table as they would appear in the L2Agg PGE")

    parser.add_argument( "-w", "--workers", dest="workers", type=int, default=1,
                         help="Number of workers to use when parallelizing splicing" )

    parser.add_argument( "--temp", dest="temp_dir", default=os.curdir,
                         help="Directory where temporary files are saved when number of parallel workers is greater than 1" )

    parser.add_argument( "-l", "--log_file", dest="log_file", 
                         help="Save verbose information to log file" )

    parser.add_argument( "--agg-names-filter", dest="agg_names_filter",
                         action="store_true",
                         default=False,
                         help="include only dataset names that would appear in the L2Agg PGE. Its only makes sense to use this option with --rename_mapping")

    parser.add_argument( "--splice-all", dest="splice_all",
                         action="store_true",
                         default=False,
                         help="splice all datasets, including those which do not have a sounding dimension. Note that datasets without an explicit handler and no sounding dimension are simply copied from the first file.")

    parser.add_argument( "--multiple-file-types", dest="multi_source_types", action="store_true", default=None,
                         help="indicates that multiple file type sources are being spliced. Speeds up multiple source type determination stage by being specified." )

    parser.add_argument( "--single-file-type", dest="multi_source_types", action="store_false", default=None,
                         help="indicates that a single type of file is being spliced. Speeds up multiple source type determination stage by being specified." )

    parser.add_argument( "-v", "--verbose", dest="verbose",
                         action="store_true",
                         default=False,
                         help="enable verbose informational reporting")

    # Parse command line arguments
    args = parser.parse_args()

    if len(args.filenames) == 0 and args.input_files_list == None:
        parser.error("Input list file must be specified")

    # Set up logging
    if args.verbose:
        # Include HDF5 errors in output
        h5py._errors.unsilence_errors()

        log_level = logging.DEBUG
    else:
        log_level = logging.INFO
    main_logger = log_util.init_logging(log_level=log_level, format="%(message)s")
        
    # Initialize logging
    if args.log_file:
        log_file = log_util.open_log_file(args.log_file, logger=main_logger)
        log_file.setFormatter( logging.Formatter("%(asctime)s: %(name)8s - %(levelname)7s - %(message)s") )
    else:
        log_file = None

    source_files = load_source_files(args.filenames, args.input_files_list)

    if args.sounding_id_file != None:
        sounding_ids = [ sid.strip() for sid in open(args.sounding_id_file).readlines() ]
    else:
        main_logger.debug("No sounding ids file supplied, aggregating all ids from all files")
        sounding_ids = None

    copy_datasets_list = None
    if args.datasets_list_file != None:
        copy_datasets_list = [ ds.strip() for ds in open(args.datasets_list_file).readlines() ]

    if args.agg_names_filter:
        if copy_datasets_list == None:
            copy_datasets_list = aggregator_dataset_dest_names
        else:
            copy_datasets_list += aggregator_dataset_dest_names

    process_files(source_files, args.output_filename, sounding_ids, splice_all=args.splice_all, desired_datasets_list=copy_datasets_list, rename_mapping=args.rename_mapping, multi_source_types=args.multi_source_types, workers=args.workers, temp_dir=args.temp_dir, main_logger=main_logger, log_file=log_file)
Exemple #2
0
def standalone_main():
    parser = ArgumentParser(
        description=
        "Splices together input HDF5 products for a given set of sounding ids")

    parser.add_argument(
        "filenames",
        metavar="FILE",
        nargs='*',
        help=
        "files to splice, may be left blank if using the -i --input-files-list option"
    )

    parser.add_argument("-i",
                        "--input-files-list",
                        dest="input_files_list",
                        metavar="FILE",
                        help="text file with input filenames to splice")

    parser.add_argument(
        "-s",
        "--sounding-id-file",
        dest="sounding_id_file",
        metavar="FILE",
        help="file containing list of soundings for destination file")

    parser.add_argument("-o",
                        "--output-file",
                        dest="output_filename",
                        metavar="FILE",
                        default=DEFAULT_OUTPUT_FILENAME,
                        help="output filename of splice data, default: %s" %
                        DEFAULT_OUTPUT_FILENAME)

    parser.add_argument(
        "-d",
        "--datasets-list-file",
        dest="datasets_list_file",
        metavar="FILE",
        help=
        "file containing list of only datasets to consider for copying. If rename_mapping is enabled then the names are matched on their destination dataset name"
    )

    parser.add_argument(
        "-r",
        "--rename-mapping",
        dest="rename_mapping",
        action="store_true",
        default=False,
        help=
        "rename datasets into output file according to internal mapping table as they would appear in the L2Agg PGE"
    )

    parser.add_argument(
        "-w",
        "--workers",
        dest="workers",
        type=int,
        default=1,
        help="Number of workers to use when parallelizing splicing")

    parser.add_argument(
        "--temp",
        dest="temp_dir",
        default=os.curdir,
        help=
        "Directory where temporary files are saved when number of parallel workers is greater than 1"
    )

    parser.add_argument("-l",
                        "--log_file",
                        dest="log_file",
                        help="Save verbose information to log file")

    parser.add_argument(
        "--agg-names-filter",
        dest="agg_names_filter",
        action="store_true",
        default=False,
        help=
        "include only dataset names that would appear in the L2Agg PGE. Its only makes sense to use this option with --rename_mapping"
    )

    parser.add_argument(
        "--splice-all",
        dest="splice_all",
        action="store_true",
        default=False,
        help=
        "splice all datasets, including those which do not have a sounding dimension. Note that datasets without an explicit handler and no sounding dimension are simply copied from the first file."
    )

    parser.add_argument(
        "--multiple-file-types",
        dest="multi_source_types",
        action="store_true",
        default=None,
        help=
        "indicates that multiple file type sources are being spliced. Speeds up multiple source type determination stage by being specified."
    )

    parser.add_argument(
        "--single-file-type",
        dest="multi_source_types",
        action="store_false",
        default=None,
        help=
        "indicates that a single type of file is being spliced. Speeds up multiple source type determination stage by being specified."
    )

    parser.add_argument("-v",
                        "--verbose",
                        dest="verbose",
                        action="store_true",
                        default=False,
                        help="enable verbose informational reporting")

    # Parse command line arguments
    args = parser.parse_args()

    if len(args.filenames) == 0 and args.input_files_list == None:
        parser.error("Input list file must be specified")

    # Set up logging
    if args.verbose:
        # Include HDF5 errors in output
        h5py._errors.unsilence_errors()

        log_level = logging.DEBUG
    else:
        log_level = logging.INFO
    main_logger = log_util.init_logging(log_level=log_level,
                                        format="%(message)s")

    # Initialize logging
    if args.log_file:
        log_file = log_util.open_log_file(args.log_file, logger=main_logger)
        log_file.setFormatter(
            logging.Formatter(
                "%(asctime)s: %(name)8s - %(levelname)7s - %(message)s"))
    else:
        log_file = None

    source_files = load_source_files(args.filenames, args.input_files_list)

    if args.sounding_id_file != None:
        sounding_ids = [
            sid.strip() for sid in open(args.sounding_id_file).readlines()
        ]
    else:
        main_logger.debug(
            "No sounding ids file supplied, aggregating all ids from all files"
        )
        sounding_ids = None

    copy_datasets_list = None
    if args.datasets_list_file != None:
        copy_datasets_list = [
            ds.strip() for ds in open(args.datasets_list_file).readlines()
        ]

    if args.agg_names_filter:
        if copy_datasets_list == None:
            copy_datasets_list = aggregator_dataset_dest_names
        else:
            copy_datasets_list += aggregator_dataset_dest_names

    process_files(source_files,
                  args.output_filename,
                  sounding_ids,
                  splice_all=args.splice_all,
                  desired_datasets_list=copy_datasets_list,
                  rename_mapping=args.rename_mapping,
                  multi_source_types=args.multi_source_types,
                  workers=args.workers,
                  temp_dir=args.temp_dir,
                  main_logger=main_logger,
                  log_file=log_file)
      to stdout
'''

args = docopt_simple(usage, version=version)

# Logger for file operations
logger = logging.getLogger(os.path.basename(__file__))

if args.quiet:
    log_util.init_logging(logging.ERROR)
else:
    log_util.init_logging(logging.INFO)

# Initialize logging
if args.log_file:
    log_obj = log_util.open_log_file(args.log_file)
    log_obj.setFormatter(
        logging.Formatter(
            "%(asctime)s: %(name)25s - %(levelname)7s - %(message)s"))
else:
    log_obj = None
populate_options = {}
if (args.binary):
    populate_options["l2_binary_filename"] = args.binary
if (args.l2_config):
    populate_options["l2_config_filename"] = args.l2_config
populate_options["aggregate"] = args.aggregate
populate_options["abscoversion"] = args.absco_version
populate_options["target_cluster"] = args.target_cluster
populate_options["group_size"] = int(args.group_size)
populate_options["parallel_size"] = int(args.parallel_size)
      to stdout
"""

args = docopt_simple(usage, version=version)

# Logger for file operations
logger = logging.getLogger(os.path.basename(__file__))

if args.quiet:
    log_util.init_logging(logging.ERROR)
else:
    log_util.init_logging(logging.INFO)

# Initialize logging
if args.log_file:
    log_obj = log_util.open_log_file(args.log_file)
    log_obj.setFormatter(logging.Formatter("%(asctime)s: %(name)25s - %(levelname)7s - %(message)s"))
else:
    log_obj = None
populate_options = {}
if args.binary:
    populate_options["l2_binary_filename"] = args.binary
if args.l2_config:
    populate_options["l2_config_filename"] = args.l2_config
populate_options["aggregate"] = args.aggregate
populate_options["abscoversion"] = args.absco_version
populate_options["target_cluster"] = args.target_cluster
populate_options["group_size"] = int(args.group_size)
populate_options["parallel_size"] = int(args.parallel_size)

for config_file in args.config_file: