def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Namespace): """ Parse arguments Args: parser (argparse.ArgumentParser) """ # import modules locally from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions from kgtk.reshape.kgtkidbuilder import KgtkIdBuilder, KgtkIdBuilderOptions from kgtk.value.kgtkvalueoptions import KgtkValueOptions _expert: bool = parsed_shared_args._expert # This helper function makes it easy to suppress options from # The help message. The options are still there, and initialize # what they need to initialize. def h(msg: str) -> str: if _expert: return msg else: return SUPPRESS parser.add_input_file(positional=True) parser.add_output_file() KgtkIdBuilderOptions.add_arguments(parser, expert=True) # Show all the options. KgtkReader.add_debug_arguments(parser, expert=_expert) KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=_expert) KgtkValueOptions.add_arguments(parser, expert=_expert)
def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Namespace): """ Parse arguments Args: parser (argparse.ArgumentParser) """ _expert: bool = parsed_shared_args._expert # This helper function makes it easy to suppress options from # The help message. The options are still there, and initialize # what they need to initialize. def h(msg: str)->str: if _expert: return msg else: return SUPPRESS parser.add_argument( "input_kgtk_file", nargs="?", type=Path, default="-", help="The KGTK file to filter. May be omitted or '-' for stdin (default=%(default)s).") parser.add_argument("-o", "--output-file", dest="output_kgtk_file", help="The KGTK file to write (default=%(default)s).", type=Path, default="-") KgtkIdBuilderOptions.add_arguments(parser, expert=True) # Show all the options. KgtkReader.add_debug_arguments(parser, expert=_expert) KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=_expert) KgtkValueOptions.add_arguments(parser, expert=_expert)
def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Namespace): """ Parse arguments Args: parser (argparse.ArgumentParser) """ from kgtk.imports.kgtkntriples import KgtkNtriples from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions, KgtkReaderMode from kgtk.reshape.kgtkidbuilder import KgtkIdBuilder, KgtkIdBuilderOptions from kgtk.utils.argparsehelpers import optional_bool from kgtk.value.kgtkvalueoptions import KgtkValueOptions _expert: bool = parsed_shared_args._expert # This helper function makes it easy to suppress options from # The help message. The options are still there, and initialize # what they need to initialize. def h(msg: str) -> str: if _expert: return msg else: return SUPPRESS parser.add_input_file(who="The ntriples file(s) to import.", allow_list=True) parser.add_output_file() parser.add_output_file( who="The ntriples output file for records that are rejected.", dest="reject_file", options=["--reject-file"], metavar="REJECT_FILE", optional=True) parser.add_input_file(who="The KGTK input file with known namespaces.", dest="namespace_file", options=["--namespace-file"], metavar="NAMESPACE_FILE", optional=True) parser.add_output_file(who="The KGTK output file with updated namespaces.", dest="updated_namespace_file", options=["--updated-namespace-file"], metavar="NAMESPACE_FILE", optional=True) KgtkNtriples.add_arguments(parser) KgtkIdBuilderOptions.add_arguments(parser) KgtkReader.add_debug_arguments(parser, expert=_expert) KgtkReaderOptions.add_arguments( parser, mode_options=True, default_mode=KgtkReaderMode[parsed_shared_args._mode], expert=_expert) KgtkValueOptions.add_arguments(parser)
def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Namespace): """ Parse arguments Args: parser (argparse.ArgumentParser) """ _expert: bool = parsed_shared_args._expert # This helper function makes it easy to suppress options from # The help message. The options are still there, and initialize # what they need to initialize. def h(msg: str)->str: if _expert: return msg else: return SUPPRESS parser.add_argument( "input_kgtk_file", nargs="?", type=Path, default="-", help="The KGTK file to filter. May be omitted or '-' for stdin (default=%(default)s).") parser.add_argument( "--columns", dest="key_column_names", help="The key columns to identify records for compaction. " + "(default=id for node files, (node1, label, node2, id) for edge files).", nargs='+', default=[ ]) parser.add_argument( "--compact-id", dest="compact_id", help="Indicate that the ID column in KGTK edge files should be compacted. " + "Normally, if the ID column exists, it is not compacted, " + "as there are use cases that need to maintain distinct lists of secondary edges for each ID value. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--presorted", dest="sorted_input", help="Indicate that the input has been presorted (or at least pregrouped) (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--verify-sort", dest="verify_sort", help="If the input has been presorted, verify its consistency (disable if only pregrouped). (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=True) parser.add_argument("-o", "--output-file", dest="output_kgtk_file", help="The KGTK file to write (default=%(default)s).", type=Path, default="-") parser.add_argument( "--build-id", dest="build_id", help="Build id values in an id column. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) KgtkIdBuilderOptions.add_arguments(parser, expert=_expert) KgtkReader.add_debug_arguments(parser, expert=_expert) KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=_expert) KgtkValueOptions.add_arguments(parser, expert=_expert)
def main(): """ Test the KGTK compact processor. TODO: Support the list output file. """ parser: ArgumentParser = ArgumentParser() parser.add_argument( dest="input_file_path", help="The KGTK file with the input data (default=%(default)s)", type=Path, nargs="?", default="-") parser.add_argument( "--columns", dest="key_column_names", help="The key columns to identify records for compaction. " + "(default=id for node files, (node1, label, node2, id) for edge files).", nargs='+', default=[]) parser.add_argument( "--keep-first", dest="keep_first_names", help= "If compaction results in a list of values for any column on this list, keep only the first value after sorting. " + "(default=none).", nargs='+', default=[]) parser.add_argument( "--compact-id", dest="compact_id", help= "Indicate that the ID column in KGTK edge files should be compacted. " + "Normally, if the ID column exists, it is not compacted, " + "as there are use cases that need to maintain distinct lists of secondary edges for each ID value. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--deduplicate", dest="deduplicate", help= "Treat all columns as key columns, overriding --columns and --compact-id. " + "This will remove completely duplicate records without compacting any new lists. " + "(default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False, metavar="True|False") parser.add_argument( "--presorted", dest="sorted_input", help= "Indicate that the input has been presorted (or at least pregrouped). (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--verify-sort", dest="verify_sort", help= "If the input has been presorted, verify its consistency (disable if only pregrouped). (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=True) parser.add_argument( "--lists-in-input", dest="lists_in_input", help= "Assume that the input file may contain lists (disable when certain it does not). (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=True) parser.add_argument( "--report-lists", dest="report_lists", help= "When True, report records with lists to the error output. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--exclude-lists", dest="exclude_lists", help= "When True, exclude records with lists from the output. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--output-only-lists", dest="output_only_lists", help= "When True, output only records containing lists. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK file to write (default=%(default)s).", type=Path, default="-") parser.add_argument( "--build-id", dest="build_id", help="Build id values in an id column. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) KgtkIdBuilderOptions.add_arguments(parser) KgtkReader.add_debug_arguments(parser) KgtkReaderOptions.add_arguments(parser, mode_options=True) KgtkValueOptions.add_arguments(parser) args: Namespace = parser.parse_args() error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr # Build the option structures. idbuilder_options: KgtkIdBuilderOptions = KgtkIdBuilderOptions.from_args( args) reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args) value_options: KgtkValueOptions = KgtkValueOptions.from_args(args) # Show the final option structures for debugging and documentation. if args.show_options: print("input: %s" % str(args.input_file_path), file=error_file, flush=True) print("--output-file=%s" % str(args.output_file_path), file=error_file, flush=True) print("--columns %s" % " ".join(args.key_column_names), file=error_file, flush=True) print("--keep-first %s" % " ".join(args.keep_first_names), file=error_file, flush=True) print("--compact-id=%s" % str(args.compact_id), file=error_file, flush=True) print("--deduplicate=%s" % str(args.deduplicate), file=error_file, flush=True) print("--presorted=%s" % str(args.sorted_input), file=error_file, flush=True) print("--verify-sort=%s" % str(args.verify_sort), file=error_file, flush=True) print("--lists-in-input=%s" % str(args.lists_in_input), file=error_file, flush=True) print("--report-lists=%s" % str(args.report_lists), file=error_file, flush=True) print("--exclude-lists=%s" % str(args.exclude_lists), file=error_file, flush=True) print("--output-only-lists=%s" % str(args.output_only_lists), file=error_file, flush=True) print("--build-id=%s" % str(args.build_id), file=error_file, flush=True) idbuilder_options.show(out=error_file) reader_options.show(out=error_file) value_options.show(out=error_file) kc: KgtkCompact = KgtkCompact(input_file_path=args.input_file_path, key_column_names=args.key_column_names, keep_first_names=args.keep_first_names, compact_id=args.compact_id, deduplicate=args.deduplicate, sorted_input=args.sorted_input, verify_sort=args.verify_sort, lists_in_input=args.lists_in_input, report_lists=args.report_lists, exclude_lists=args.exclude_lists, output_only_lists=args.output_only_lists, output_file_path=args.output_file_path, build_id=args.build_id, idbuilder_options=idbuilder_options, reader_options=reader_options, value_options=value_options, error_file=error_file, verbose=args.verbose, very_verbose=args.very_verbose) kc.process()
def main(): """ Test the KGTK ntriples importer. """ parser: ArgumentParser = ArgumentParser() parser.add_argument( "-i", "--input-files", dest="input_file_paths", nargs='*', help="The file(s) with the input ntriples data. (default=%(default)s)", type=Path, default="-") parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK file to write (default=%(default)s).", type=Path, default="-") parser.add_argument( "--reject-file", dest="reject_file_path", help= "The KGTK file into which to write rejected records. (default=%(default)s).", type=Path, default=None) parser.add_argument( "--namespace-file", dest="namespace_file_path", help="The KGTK file with known namespaces. (default=%(default)s).", type=Path, default=None) parser.add_argument( "--updated-namespace-file", dest="updated_namespace_file_path", help= "An updated KGTK file with known namespaces. (default=%(default)s).", type=Path, default=None) KgtkNtriples.add_arguments(parser) KgtkIdBuilderOptions.add_arguments(parser) KgtkReader.add_debug_arguments(parser) KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=True) KgtkValueOptions.add_arguments(parser) args: Namespace = parser.parse_args() error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr # Build the option structures. idbuilder_options: KgtkIdBuilderOptions = KgtkIdBuilderOptions.from_args( args) reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args) value_options: KgtkValueOptions = KgtkValueOptions.from_args(args) # Show the final option structures for debugging and documentation. if args.show_options: print("--input-files %s" % " ".join([str(path) for path in input_file_paths]), file=error_file, flush=True) print("--output-file=%s" % str(args.output_file_path), file=error_file, flush=True) # TODO: show ifempty-specific options. if args.reject_file_path is not None: print("--reject-file=%s" % str(args.reject_file_path), file=error_file, flush=True) if args.namespace_file_path is not None: print("--namespace-file=%s" % str(args.namespace_file_path), file=error_file, flush=True) if args.updated_namespace_file_path is not None: print("--updated-namespace-file=%s" % str(args.updated_namespace_file_path), file=error_file, flush=True) print("--namespace-id-prefix %s" % args.namespace_id_prefix, file=error_file, flush=True) print("--namespace-id-use-uuid %s" % str(args.namespace_id_use_uuid), file=error_file, flush=True) print("--namespace-id-counter %s" % str(args.namespace_id_counter), file=error_file, flush=True) print("--namespace-id-zfill %s" % str(args.namespace_id_zfill), file=error_file, flush=True) print("--output-only-used-namespaces %s" % str(args.output_only_used_namespaces), file=error_file, flush=True) print("--allow-lax-uri %s" % str(args.allow_lax_uri), file=error_file, flush=True) print("--local-namespace-prefix %s" % args.local_namespace_prefix, file=error_file, flush=True) print("--local-namespace-use-uuid %s" % str(args.local_namespace_use_uuid), file=error_file, flush=True) print("--prefix-expansion-label %s" % args.prefix_expansion_label, file=error_file, flush=True) print("--structured-value-label %s" % args.structured_value_label, file=error_file, flush=True) print("--structured-uri-label %s" % args.structured_uri_label, file=error_file, flush=True) print("--newnode-prefix %s" % args.newnode_prefix, file=error_file, flush=True) print("--newnode-use-uuid %s" % str(args.newnode_use_uuid), file=error_file, flush=True) print("--newnode-counter %s" % str(args.newnode_counter), file=error_file, flush=True) print("--newnode-zfill %s" % str(args.newnode_zfill), file=error_file, flush=True) print("--build-id=%s" % str(args.build_id), file=error_file, flush=True) print("--escape-pipes=%s" % str(args.escape_pipes), file=error_file, flush=True) print("--validate=%s" % str(args.validate), file=error_file, flush=True) if args.override_uuid is not None: print("--override_uuid=%s" % str(args.override_uuid), file=error_file, flush=True) idbuilder_options.show(out=error_file) reader_options.show(out=error_file) value_options.show(out=error_file) kn: KgtkNtriples = KgtkNtriples( input_file_paths=args.input_file_paths, output_file_path=args.output_file_path, reject_file_path=args.reject_file_path, namespace_file_path=args.namespace_file_path, updated_namespace_file_path=args.updated_namespace_file_path, namespace_id_prefix=args.namespace_id_prefix, namespace_id_use_uuid=args.namespace_id_use_uuid, namespace_id_counter=args.namespace_id_counter, namespace_id_zfill=args.namespace_id_zfill, output_only_used_namespaces=args.output_only_used_namespaces, newnode_prefix=args.newnode_prefix, newnode_use_uuid=args.newnode_use_uuid, newnode_counter=args.newnode_counter, newnode_zfill=args.newnode_zfill, allow_lax_uri=args.allow_lax_uri, local_namespace_prefix=args.local_namespace_prefix, local_namespace_use_uuid=args.local_namespace_use_uuid, prefix_expansion_label=args.prefix_expansion_label, structured_value_label=args.structured_value_label, structured_uri_label=args.structured_uri_label, build_id=args.build_id, escape_pipes=args.escape_pipes, idbuilder_options=idbuilder_options, validate=args.validate, override_uuid=args.override_uuid, reader_options=reader_options, value_options=value_options, error_file=error_file, verbose=args.verbose, very_verbose=args.very_verbose) kn.process()
def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Namespace): """ Parse arguments Args: parser (argparse.ArgumentParser) """ from kgtk.kgtkformat import KgtkFormat from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions from kgtk.reshape.kgtkidbuilder import KgtkIdBuilder, KgtkIdBuilderOptions from kgtk.utils.argparsehelpers import optional_bool from kgtk.value.kgtkvalue import KgtkValueFields from kgtk.value.kgtkvalueoptions import KgtkValueOptions _expert: bool = parsed_shared_args._expert # This helper function makes it easy to suppress options from # The help message. The options are still there, and initialize # what they need to initialize. def h(msg: str)->str: if _expert: return msg else: return SUPPRESS parser.add_input_file(positional=True) parser.add_output_file() parser.add_output_file(who="The KGTK file for records that are rejected.", dest="reject_file", options=["--reject-file"], metavar="REJECT_FILE", optional=True) parser.add_argument( "--column", dest="column_name", help="The name of the column to explode. (default=%(default)s).", default=KgtkFormat.NODE2) parser.add_argument( "--prefix", dest="prefix", help="The prefix for exploded column names. (default=%(default)s).", default=KgtkFormat.NODE2 + ";" + KgtkFormat.KGTK_NAMESPACE) parser.add_argument( "--types", dest="type_names", nargs='*', help="The KGTK data types for which fields should be imploded. (default=%(default)s).", choices=KgtkFormat.DataType.choices(), default=KgtkFormat.DataType.choices()) parser.add_argument( "--without", dest="without_fields", nargs='*', help="The KGTK fields to do without. (default=%(default)s).", choices=KgtkValueFields.OPTIONAL_DEFAULT_FIELD_NAMES, default=None) parser.add_argument( "--overwrite", dest="overwrite_column", help="Indicate that it is OK to overwrite an existing imploded column. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=True) parser.add_argument( "--validate", dest="validate", help="Validate imploded values. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=True) parser.add_argument( "--escape-pipes", dest="escape_pipes", help="When true, pipe characters (|) need to be escaped (\\|) per KGTK file format. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--quantities-include-numbers", dest="quantities_include_numbers", help="When true, numbers are acceptable quantities. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=True) parser.add_argument( "--general-strings", dest="general_strings", help="When true, strings may include language qualified strings. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=True) parser.add_argument( "--remove-prefixed-columns", dest="remove_prefixed_columns", help="When true, remove all columns beginning with the prefix from the output file. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--ignore-unselected-types", dest="ignore_unselected_types", help="When true, input records with valid but unselected data types will be passed through to output. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=True) parser.add_argument( "--retain-unselected-types", dest="retain_unselected_types", help="When true, input records with valid but unselected data types will be retain existing data on output. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=True) parser.add_argument( "--build-id", dest="build_id", help="Build id values in an id column. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--show-data-types", dest="show_data_types", help="Print the list of data types and exit. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--quiet", dest="quiet", help="When true, suppress certain complaints unless verbose. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) KgtkIdBuilderOptions.add_arguments(parser) KgtkReader.add_debug_arguments(parser, expert=_expert) KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=_expert) KgtkValueOptions.add_arguments(parser, expert=_expert)
def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Namespace): """ Parse arguments Args: parser (argparse.ArgumentParser) """ from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions from kgtk.reshape.kgtkidbuilder import KgtkIdBuilder, KgtkIdBuilderOptions from kgtk.utils.argparsehelpers import optional_bool from kgtk.value.kgtkvalueoptions import KgtkValueOptions _command: str = parsed_shared_args._command _expert: bool = parsed_shared_args._expert # This helper function makes it easy to suppress options from # The help message. The options are still there, and initialize # what they need to initialize. def h(msg: str) -> str: if _expert: return msg else: return SUPPRESS parser.add_input_file(positional=True) parser.add_output_file() parser.add_output_file( who= "A KGTK output file that will contain only the rows containing lists." + " This file will have the same columns as the primary output file.", dest="list_output_file", options=["--list-output-file"], metavar="LIST_OUTPUT_FILE", optional=True) if _command == DEDUP_COMMAND: parser.add_argument( "--columns", dest="key_column_names", help=h("The key columns to identify records for compaction. " + "(default=all columns)."), nargs='+', default=[]) parser.add_argument( "--compact-id", dest="compact_id", help= h("Indicate that the ID column in KGTK edge files should be compacted. " + "Normally, if the ID column exists, it is not compacted, " + "as there are use cases that need to maintain distinct lists of secondary edges for each ID value. (default=%(default)s)." ), type=optional_bool, nargs='?', const=True, default=False, metavar="True|False") parser.add_argument( "--deduplicate", dest="deduplicate", help= h("Treat all columns as key columns, overriding --columns and --compact-id. " + "This will remove completely duplicate records without compacting any new lists. " + "(default=%(default)s)."), type=optional_bool, nargs='?', const=True, default=True, metavar="True|False") parser.add_argument( "--lists-in-input", dest="lists_in_input", help= h("Assume that the input file may contain lists (disable when certain it does not). (default=%(default)s)." ), type=optional_bool, nargs='?', const=True, default=True) parser.add_argument( "--keep-first", dest="keep_first_names", help= h("If compaction results in a list of values for any column on this list, keep only the first value after sorting. " + "(default=none)."), nargs='+', default=[]) else: parser.add_argument( "--columns", dest="key_column_names", help="The key columns to identify records for compaction. " + "(default=id for node files, (node1, label, node2, id) for edge files).", nargs='+', default=[]) parser.add_argument( "--compact-id", dest="compact_id", help= "Indicate that the ID column in KGTK edge files should be compacted. " + "Normally, if the ID column exists, it is not compacted, " + "as there are use cases that need to maintain distinct lists of secondary edges for each ID value. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False, metavar="True|False") parser.add_argument( "--deduplicate", dest="deduplicate", help= "Treat all columns as key columns, overriding --columns and --compact-id. " + "This will remove completely duplicate records without compacting any new lists. " + "(default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False, metavar="True|False") parser.add_argument( "--lists-in-input", dest="lists_in_input", help= "Assume that the input file may contain lists (disable when certain it does not). (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=True) parser.add_argument( "--keep-first", dest="keep_first_names", help= "If compaction results in a list of values for any column on this list, keep only the first value after sorting. " + "(default=none).", nargs='+', default=[]) parser.add_argument( "--presorted", dest="sorted_input", help= "Indicate that the input has been presorted (or at least pregrouped) (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False, metavar="True|False") parser.add_argument( "--verify-sort", dest="verify_sort", help= "If the input has been presorted, verify its consistency (disable if only pregrouped). (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=True, metavar="True|False") parser.add_argument( "--report-lists", dest="report_lists", help= "When True, report records with lists to the error output. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--exclude-lists", dest="exclude_lists", help= "When True, exclude records with lists from the output. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--output-only-lists", dest="output_only_lists", help= "When True, only records containing lists will be written to the primary output file. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--build-id", dest="build_id", help="Build id values in an id column. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False, metavar="True|False") KgtkIdBuilderOptions.add_arguments(parser, expert=_expert) KgtkReader.add_debug_arguments(parser, expert=_expert) KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=_expert) KgtkValueOptions.add_arguments(parser, expert=_expert)
def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Namespace): """ Parse arguments Args: parser (argparse.ArgumentParser) """ from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions from kgtk.reshape.kgtkidbuilder import KgtkIdBuilder, KgtkIdBuilderOptions from kgtk.utils.argparsehelpers import optional_bool from kgtk.value.kgtkvalueoptions import KgtkValueOptions _expert: bool = parsed_shared_args._expert # This helper function makes it easy to suppress options from # The help message. The options are still there, and initialize # what they need to initialize. def h(msg: str) -> str: if _expert: return msg else: return SUPPRESS parser.add_input_file(positional=True) parser.add_output_file() parser.add_argument( "--columns", dest="key_column_names", help="The key columns to identify records for compaction. " + "(default=id for node files, (node1, label, node2, id) for edge files).", nargs='+', default=[]) parser.add_argument( "--compact-id", dest="compact_id", help= "Indicate that the ID column in KGTK edge files should be compacted. " + "Normally, if the ID column exists, it is not compacted, " + "as there are use cases that need to maintain distinct lists of secondary edges for each ID value. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False, metavar="True|False") parser.add_argument( "--presorted", dest="sorted_input", help= "Indicate that the input has been presorted (or at least pregrouped) (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False, metavar="True|False") parser.add_argument( "--verify-sort", dest="verify_sort", help= "If the input has been presorted, verify its consistency (disable if only pregrouped). (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=True, metavar="True|False") parser.add_argument( "--lists-in-input", dest="lists_in_input", help= "Assume that the input file may contain lists (disable when certain it does not). (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=True) parser.add_argument( "--build-id", dest="build_id", help="Build id values in an id column. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False, metavar="True|False") KgtkIdBuilderOptions.add_arguments(parser, expert=_expert) KgtkReader.add_debug_arguments(parser, expert=_expert) KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=_expert) KgtkValueOptions.add_arguments(parser, expert=_expert)
def main(): """ Test the KGTK implode processor. """ parser: ArgumentParser = ArgumentParser() parser.add_argument( dest="input_file_path", help="The KGTK file with the input data. (default=%(default)s)", type=Path, nargs="?", default="-") parser.add_argument( "--column", dest="column_name", help="The name of the column to explode. (default=%(default)s).", default="node2") parser.add_argument( "--types", dest="type_names", nargs='*', help= "The KGTK data types for which fields should be imploded. (default=%(default)s).", choices=KgtkFormat.DataType.choices(), default=KgtkFormat.DataType.choices()) parser.add_argument( "--without", dest="without_fields", nargs='*', help="The KGTK fields to do without. (default=%(default)s).", choices=KgtkValueFields.OPTIONAL_DEFAULT_FIELD_NAMES, default=None) parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK file to write (default=%(default)s).", type=Path, default="-") parser.add_argument( "--prefix", dest="prefix", help="The prefix for exploded column names. (default=%(default)s).", default="node2;kgtk:") parser.add_argument( "--overwrite", dest="overwrite_column", help= "Indicate that it is OK to overwrite an existing imploded column. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=True) parser.add_argument( "--validate", dest="validate", help="Validate imploded values. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=True) parser.add_argument( "--escape-pipes", dest="escape_pipes", help= "When true, pipe characters (|) need to be escaped (\\|) per KGTK file format. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--quantities-include-numbers", dest="quantities_include_numbers", help= "When true, numbers are acceptable quantities. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=True) parser.add_argument( "--general-strings", dest="general_strings", help= "When true, strings may include language qualified strings. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=True) parser.add_argument( "--remove-prefixed-columns", dest="remove_prefixed_columns", help= "When true, remove all columns beginning with the prefix from the output file. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--ignore-unselected-types", dest="ignore_unselected_types", help= "When true, input records with valid but unselected data types will be passed through to output. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=True) parser.add_argument( "--retain-unselected-types", dest="retain_unselected_types", help= "When true, input records with valid but unselected data types will be retain existing data on output. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=True) parser.add_argument( "--build-id", dest="build_id", help="Build id values in an id column. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--reject-file", dest="reject_file_path", help= "The KGTK file into which to write rejected records (default=%(default)s).", type=Path, default=None) KgtkIdBuilderOptions.add_arguments(parser) KgtkReader.add_debug_arguments(parser) KgtkReaderOptions.add_arguments(parser, mode_options=True) KgtkValueOptions.add_arguments(parser) args: Namespace = parser.parse_args() error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr # Build the option structures. idbuilder_options: KgtkIdBuilderOptions = KgtkIdBuilderOptions.from_args( args) reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args) value_options: KgtkValueOptions = KgtkValueOptions.from_args(args) # Show the final option structures for debugging and documentation. if args.show_options: # TODO: show ifempty-specific options. print("input: %s" % str(args.input_file_path), file=error_file, flush=True) print("--column %s" % args.column_name, file=error_file, flush=True) print("--prefix %s" % args.prefix, file=error_file, flush=True) print("--overwrite %s" % str(args.overwrite_column), file=error_file, flush=True) print("--validate %s" % str(args.validate), file=error_file, flush=True) print("--escape-pipes %s" % str(args.escape_pipes), file=error_file, flush=True) print("--quantities-include-numbers %s" % str(args.quantities_include_numbers), file=error_file, flush=True) print("--general-strings %s" % str(args.general_strings), file=error_file, flush=True) print("--remove-prefixed-columns %s" % str(args.remove_prefixed_columns), file=error_file, flush=True) print("--ignore-unselected-types %s" % str(args.ignore_unselected_types), file=error_file, flush=True) print("--retain-unselected-types %s" % str(args.retain_unselected_types), file=error_file, flush=True) if args.type_names is not None: print("--types %s" % " ".join(args.type_names), file=error_file, flush=True) if args.without_fields is not None: print("--without %s" % " ".join(args.without_fields), file=error_file, flush=True) print("--output-file=%s" % str(args.output_file_path), file=error_file, flush=True) if args.reject_file_path is not None: print("--reject-file=%s" % str(args.reject_file_path), file=error_file, flush=True) print("--build-id=%s" % str(args.build_id), file=error_file, flush=True) idbuilder_options.show(out=error_file) reader_options.show(out=error_file) value_options.show(out=error_file) without_fields: typing.List[ str] = args.without_fields if args.without_fields is not None else list( ) ex: KgtkImplode = KgtkImplode( input_file_path=args.input_file_path, column_name=args.column_name, prefix=args.prefix, type_names=args.type_names, without_fields=without_fields, overwrite_column=args.overwrite_column, validate=args.validate, escape_pipes=args.escape_pipes, quantities_include_numbers=args.quantities_include_numbers, general_strings=args.general_strings, remove_prefixed_columns=args.remove_prefixed_columns, ignore_unselected_types=args.ignore_unselected_types, retain_unselected_types=args.retain_unselected_types, output_file_path=args.output_file_path, reject_file_path=args.reject_file_path, build_id=args.build_id, idbuilder_options=idbuilder_options, reader_options=reader_options, value_options=value_options, error_file=error_file, verbose=args.verbose, very_verbose=args.very_verbose) ex.process()
def main(): """ Test the KGTK compact processor. """ parser: ArgumentParser = ArgumentParser() parser.add_argument(dest="input_file_path", help="The KGTK file with the input data (default=%(default)s)", type=Path, nargs="?", default="-") parser.add_argument( "--columns", dest="key_column_names", help="The key columns to identify records for compaction. " + "(default=id for node files, (node1, label, node2, id) for edge files).", nargs='+', default=[ ]) parser.add_argument( "--compact-id", dest="compact_id", help="Indicate that the ID column in KGTK edge files should be compacted. " + "Normally, if the ID column exists, it is not compacted, " + "as there are use cases that need to maintain distinct lists of secondary edges for each ID value. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--presorted", dest="sorted_input", help="Indicate that the input has been presorted (or at least pregrouped). (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--verify-sort", dest="verify_sort", help="If the input has been presorted, verify its consistency (disable if only pregrouped). (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=True) parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK file to write (default=%(default)s).", type=Path, default="-") parser.add_argument( "--build-id", dest="build_id", help="Build id values in an id column. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) KgtkIdBuilderOptions.add_arguments(parser) KgtkReader.add_debug_arguments(parser) KgtkReaderOptions.add_arguments(parser, mode_options=True) KgtkValueOptions.add_arguments(parser) args: Namespace = parser.parse_args() error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr # Build the option structures. idbuilder_options: KgtkIdBuilderOptions = KgtkIdBuilderOptions.from_args(args) reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args) value_options: KgtkValueOptions = KgtkValueOptions.from_args(args) # Show the final option structures for debugging and documentation. if args.show_options: print("input: %s" % str(args.input_file_path), file=error_file, flush=True) print("--columns %s" % " ".join(args.key_column_names), file=error_file, flush=True) print("--compact-id=%s" % str(args.compact_id), file=error_file, flush=True) print("--presorted=%s" % str(args.sorted_input), file=error_file, flush=True) print("--verify-sort=%s" % str(args.verify_sort), file=error_file, flush=True) print("--output-file=%s" % str(args.output_file_path), file=error_file, flush=True) print("--build-id=%s" % str(args.build_id), file=error_file, flush=True) idbuilder_options.show(out=error_file) reader_options.show(out=error_file) value_options.show(out=error_file) kc: KgtkCompact = KgtkCompact( input_file_path=args.input_file_path, key_column_names=args.key_column_names, compact_id=args.compact_id, sorted_input=args.sorted_input, verify_sort=args.verify_sort, output_file_path=args.output_file_path, build_id=args.build_id, idbuilder_options=idbuilder_options, reader_options=reader_options, value_options=value_options, error_file=error_file, verbose=args.verbose, very_verbose=args.very_verbose) kc.process()
def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Namespace): """ Parse arguments Args: parser (argparse.ArgumentParser) """ from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions, KgtkReaderMode from kgtk.reshape.kgtkidbuilder import KgtkIdBuilder, KgtkIdBuilderOptions from kgtk.utils.argparsehelpers import optional_bool from kgtk.value.kgtkvalue import KgtkValue from kgtk.value.kgtkvalueoptions import KgtkValueOptions _expert: bool = parsed_shared_args._expert _command: str = parsed_shared_args._command _mode: str = parsed_shared_args._mode # This helper function makes it easy to suppress options from # The help message. The options are still there, and initialize # what they need to initialize. def h(msg: str) -> str: if _expert: return msg else: return SUPPRESS parser.add_input_file(positional=True) parser.add_output_file() parser.add_output_file( who= "An optional output file for new edges (normalized and/or lowered). " + "If omitted, new edges will go in the main output file.", dest="new_edges_file", options=["--new-edges-file"], metavar="NEW_EDGES_FILE", optional=True) parser.add_argument( "--columns", "--columns-to-lower", "--columns-to-remove", action="store", type=str, dest="columns_to_lower", nargs='+', help= "Columns to lower and remove as a space-separated list. (default=all columns other than key columns)" ) parser.add_argument( "--base-columns", dest="base_columns", help= h("Optionally, explicitly list the base column for each column being lowered. " + " --base-columns and --columns-to-lower must have the same number of entries." ), nargs='*') parser.add_argument( "--label-values", action="store", type=str, dest="label_values", nargs='*', help= h("When not empty, a list of label values to use for lowered edges when --base-columns is used, overriding the original column names. (default=%(default)s)" )) parser.add_argument( "--lift-separator", dest="lift_separator", help= h("The separator between the base column and the label value. (default=%(default)s)." ), default=KgtkLift.DEFAULT_OUTPUT_LIFTED_COLUMN_SEPARATOR) parser.add_argument( "--ignore-empty-node1", dest="ignore_empty_node1", help= h("When True, ignore attempts to lower into a new record with an empty node1 value. (default=%(default)s)" ), type=optional_bool, nargs='?', const=True, default=False, metavar="True|False") parser.add_argument( "--ignore-empty-node2", dest="ignore_empty_node2", help= h("When True, ignore attempts to lower into a new record with an empty node2 value. (default=%(default)s)" ), type=optional_bool, nargs='?', const=True, default=False, metavar="True|False") parser.add_argument( "--add-id", dest="add_id", help= "When True, add an id column to the output (if not already present). (default=%(default)s)", type=optional_bool, nargs='?', const=True, default=False, metavar="True|False") if _command == LOWER_COMMAND: parser.add_argument( "--lower", dest="lower", help= h("When True, lower columns that match a lift pattern. (default=%(default)s)" ), type=optional_bool, nargs='?', const=True, default=True, metavar="True|False") parser.add_argument( "--normalize", dest="normalize", help= h("When True, normalize columns that do not match a lift pattern. (default=%(default)s)" ), type=optional_bool, nargs='?', const=True, default=False, metavar="True|False") elif _command == NORMALIZE_EDGES_COMMAND: parser.add_argument( "--lower", dest="lower", help= h("When True, lower columns that match a lift pattern. (default=%(default)s)" ), type=optional_bool, nargs='?', const=True, default=False, metavar="True|False") parser.add_argument( "--normalize", dest="normalize", help= h("When True, normalize columns that do not match a lift pattern. (default=%(default)s)" ), type=optional_bool, nargs='?', const=True, default=True, metavar="True|False") else: parser.add_argument( "--lower", dest="lower", help= "When True, lower columns that match a lift pattern. (default=%(default)s)", type=optional_bool, nargs='?', const=True, default=_command != NORMALIZE_EDGES_COMMAND, metavar="True|False") parser.add_argument( "--normalize", dest="normalize", help= "When True, normalize columns that do not match a lift pattern. (default=%(default)s)", type=optional_bool, nargs='?', const=True, default=_command != LOWER_COMMAND, metavar="True|False") parser.add_argument( "--deduplicate-new-edges", dest="deduplicate_new_edges", help= "When True, deduplicate new edges. Not suitable for large files. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=True, metavar="True|False") KgtkIdBuilderOptions.add_arguments( parser, default_style=KgtkIdBuilderOptions.CONCAT_NLN_NUM_STYLE, expert=_expert) KgtkReader.add_debug_arguments(parser, expert=_expert) KgtkReaderOptions.add_arguments(parser, mode_options=True, default_mode=KgtkReaderMode.NONE if _mode == "NONE" else KgtkReaderMode.EDGE, expert=_expert) KgtkValueOptions.add_arguments(parser, expert=_expert)