def main(): """ Test the KGTK node file reader. """ parser = ArgumentParser() parser.add_argument(dest="kgtk_file", help="The KGTK edge file to read", type=Path, nargs="?") KgtkReader.add_debug_arguments(parser, expert=True) KgtkReaderOptions.add_arguments(parser, validate_by_default=True, expert=True) KgtkValueOptions.add_arguments(parser, expert=True) args = parser.parse_args() error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr # Build the option structures. reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args, mode=KgtkReaderMode.NODE) value_options: KgtkValueOptions = KgtkValueOptions.from_args(args) if args.show_options: reader_options.show(out=error_file) value_options.show(out=error_file) print("=======", file=error_file, flush=True) nr: NodeReader = NodeReader.open_node_file(args.kgtk_file, error_file=error_file, options=reader_options, value_options=value_options, verbose=args.verbose, very_verbose=args.very_verbose) line_count: int = 0 row: typing.List[str] for row in nr: line_count += 1 print("Read %d lines" % line_count)
def main(): """ Test the language validator. """ parser: ArgumentParser = ArgumentParser() parser.add_argument(dest="values", help="The values(s) to test", type=str, nargs="+") parser.add_argument("-v", "--verbose", dest="verbose", help="Print additional progress messages.", action='store_true') KgtkValueOptions.add_arguments(parser) args: Namespace = parser.parse_args() # Build the value parsing option structure. value_options: KgtkValueOptions = KgtkValueOptions.from_args(args) value: str for value in args.values: result: bool = LanguageValidator.validate(value, options=value_options, verbose=args.verbose) print("%s: %s" % (value, str(result)), flush=True)
def main(): """ Test the KGTK ifempty processor. """ parser: ArgumentParser = ArgumentParser() parser.add_argument(dest="input_file_path", help="The KGTK file with the input data (default=%(default)s)", type=Path, nargs="?", default="-") parser.add_argument( "--columns", dest="key_column_names", help="The key columns will not be expanded (default=None).", nargs='+', default = [ ]) parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK file to write (default=%(default)s).", type=Path, default="-") KgtkReader.add_debug_arguments(parser) KgtkReaderOptions.add_arguments(parser, mode_options=True) KgtkValueOptions.add_arguments(parser) args: Namespace = parser.parse_args() error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr # Build the option structures. reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args) value_options: KgtkValueOptions = KgtkValueOptions.from_args(args) # Show the final option structures for debugging and documentation. if args.show_options: # TODO: show ifempty-specific options. print("input: %s" % str(args.input_file_path), file=error_file, flush=True) print("--columns %s" % " ".join(args.key_column_names), file=error_file, flush=True) print("--output-file=%s" % str(args.output_file_path)) reader_options.show(out=error_file) value_options.show(out=error_file) ex: KgtkExpand = KgtkExpand( input_file_path=args.input_file_path, key_column_names=args.key_column_names, output_file_path=args.output_file_path, reader_options=reader_options, value_options=value_options, error_file=error_file, verbose=args.verbose, very_verbose=args.very_verbose) ex.process()
def main(): """ Test the KGTK copy template. """ parser: ArgumentParser = ArgumentParser() parser.add_argument("-i", "--input-file", dest="input_file_path", help="The KGTK input file. (default=%(default)s)", type=Path, default="-") parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK output file. (default=%(default)s).", type=Path, default="-") KgtkReader.add_debug_arguments(parser) KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=True) KgtkValueOptions.add_arguments(parser) args: Namespace = parser.parse_args() error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr # Build the option structures. reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args) value_options: KgtkValueOptions = KgtkValueOptions.from_args(args) # Show the final option structures for debugging and documentation. if args.show_options: print("--input-files %s" % " ".join([str(path) for path in input_file_paths]), file=error_file, flush=True) print("--output-file=%s" % str(args.output_file_path), file=error_file, flush=True) reader_options.show(out=error_file) value_options.show(out=error_file) kct: KgtkCopyTemplate = KgtkCopyTemplate( input_file_path=args.input_file_path, output_file_path=args.output_file_path, reader_options=reader_options, value_options=value_options, error_file=error_file, verbose=args.verbose, very_verbose=args.very_verbose, ) kct.process()
def main(): """ Test the KGTK ntriples importer. """ parser: ArgumentParser = ArgumentParser() parser.add_argument( "-i", "--input-files", dest="input_file_paths", nargs='*', help="The file(s) with the input ntriples data. (default=%(default)s)", type=Path, default="-") parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK file to write (default=%(default)s).", type=Path, default="-") parser.add_argument( "--reject-file", dest="reject_file_path", help= "The KGTK file into which to write rejected records. (default=%(default)s).", type=Path, default=None) parser.add_argument( "--namespace-file", dest="namespace_file_path", help="The KGTK file with known namespaces. (default=%(default)s).", type=Path, default=None) parser.add_argument( "--updated-namespace-file", dest="updated_namespace_file_path", help= "An updated KGTK file with known namespaces. (default=%(default)s).", type=Path, default=None) KgtkNtriples.add_arguments(parser) KgtkIdBuilderOptions.add_arguments(parser) KgtkReader.add_debug_arguments(parser) KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=True) KgtkValueOptions.add_arguments(parser) args: Namespace = parser.parse_args() error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr # Build the option structures. idbuilder_options: KgtkIdBuilderOptions = KgtkIdBuilderOptions.from_args( args) reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args) value_options: KgtkValueOptions = KgtkValueOptions.from_args(args) # Show the final option structures for debugging and documentation. if args.show_options: print("--input-files %s" % " ".join([str(path) for path in input_file_paths]), file=error_file, flush=True) print("--output-file=%s" % str(args.output_file_path), file=error_file, flush=True) # TODO: show ifempty-specific options. if args.reject_file_path is not None: print("--reject-file=%s" % str(args.reject_file_path), file=error_file, flush=True) if args.namespace_file_path is not None: print("--namespace-file=%s" % str(args.namespace_file_path), file=error_file, flush=True) if args.updated_namespace_file_path is not None: print("--updated-namespace-file=%s" % str(args.updated_namespace_file_path), file=error_file, flush=True) print("--namespace-id-prefix %s" % args.namespace_id_prefix, file=error_file, flush=True) print("--namespace-id-use-uuid %s" % str(args.namespace_id_use_uuid), file=error_file, flush=True) print("--namespace-id-counter %s" % str(args.namespace_id_counter), file=error_file, flush=True) print("--namespace-id-zfill %s" % str(args.namespace_id_zfill), file=error_file, flush=True) print("--output-only-used-namespaces %s" % str(args.output_only_used_namespaces), file=error_file, flush=True) print("--allow-lax-uri %s" % str(args.allow_lax_uri), file=error_file, flush=True) print("--local-namespace-prefix %s" % args.local_namespace_prefix, file=error_file, flush=True) print("--local-namespace-use-uuid %s" % str(args.local_namespace_use_uuid), file=error_file, flush=True) print("--prefix-expansion-label %s" % args.prefix_expansion_label, file=error_file, flush=True) print("--structured-value-label %s" % args.structured_value_label, file=error_file, flush=True) print("--structured-uri-label %s" % args.structured_uri_label, file=error_file, flush=True) print("--newnode-prefix %s" % args.newnode_prefix, file=error_file, flush=True) print("--newnode-use-uuid %s" % str(args.newnode_use_uuid), file=error_file, flush=True) print("--newnode-counter %s" % str(args.newnode_counter), file=error_file, flush=True) print("--newnode-zfill %s" % str(args.newnode_zfill), file=error_file, flush=True) print("--build-id=%s" % str(args.build_id), file=error_file, flush=True) print("--escape-pipes=%s" % str(args.escape_pipes), file=error_file, flush=True) print("--validate=%s" % str(args.validate), file=error_file, flush=True) if args.override_uuid is not None: print("--override_uuid=%s" % str(args.override_uuid), file=error_file, flush=True) idbuilder_options.show(out=error_file) reader_options.show(out=error_file) value_options.show(out=error_file) kn: KgtkNtriples = KgtkNtriples( input_file_paths=args.input_file_paths, output_file_path=args.output_file_path, reject_file_path=args.reject_file_path, namespace_file_path=args.namespace_file_path, updated_namespace_file_path=args.updated_namespace_file_path, namespace_id_prefix=args.namespace_id_prefix, namespace_id_use_uuid=args.namespace_id_use_uuid, namespace_id_counter=args.namespace_id_counter, namespace_id_zfill=args.namespace_id_zfill, output_only_used_namespaces=args.output_only_used_namespaces, newnode_prefix=args.newnode_prefix, newnode_use_uuid=args.newnode_use_uuid, newnode_counter=args.newnode_counter, newnode_zfill=args.newnode_zfill, allow_lax_uri=args.allow_lax_uri, local_namespace_prefix=args.local_namespace_prefix, local_namespace_use_uuid=args.local_namespace_use_uuid, prefix_expansion_label=args.prefix_expansion_label, structured_value_label=args.structured_value_label, structured_uri_label=args.structured_uri_label, build_id=args.build_id, escape_pipes=args.escape_pipes, idbuilder_options=idbuilder_options, validate=args.validate, override_uuid=args.override_uuid, reader_options=reader_options, value_options=value_options, error_file=error_file, verbose=args.verbose, very_verbose=args.very_verbose) kn.process()
def main(): """ Test the KGTK unique processor. """ parser: ArgumentParser = ArgumentParser() parser.add_argument(dest="input_file_path", help="The KGTK file with the input data", type=Path, nargs="?") parser.add_argument("--column", dest="column_name", help="The column to count unique values (required).", required=True) parser.add_argument( "--empty", dest="empty_value", help="A value to substitute for empty values (default=%(default)s).", default="") parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK file to write (default=%(default)s).", type=Path, default="-") parser.add_argument( "--label", dest="label_value", help="The output file label column value (default=%(default)s).", default="count") # TODO: use an enum parser.add_argument( "--format", dest="output_format", help="The output file format and mode (default=%(default)s).", default="edge", choices=["edge", "node"]) parser.add_argument("--prefix", dest="prefix", help="The value prefix (default=%(default)s).", default="") parser.add_argument( "--where", dest="where_column_name", help= "The name of a column for a record selection test. (default=%(default)s).", default=None) parser.add_argument( "--in", dest="where_values", nargs="+", help= "The list of values for a record selection test. (default=%(default)s).", default=None) KgtkReader.add_debug_arguments(parser) KgtkReaderOptions.add_arguments(parser, mode_options=True) KgtkValueOptions.add_arguments(parser) args: Namespace = parser.parse_args() error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr # Build the option structures. reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args) value_options: KgtkValueOptions = KgtkValueOptions.from_args(args) # Show the final option structures for debugging and documentation. if args.show_options: print("input: %s" % (str(args.input_file_path) if args.input_file_path is not None else "-"), file=error_file) print("--column=%s" % args.column_name, file=error_file) print("--empty=%s" % args.empty_value, file=error_file) print("--output-file=%s" % str(args.output_file_path), file=error_file) print("--label=%s" % args.label_value, file=error_file) print("--format=%s" % args.output_format, file=error_file) print("--prefix=%s" % args.prefix, file=error_file) if args.where_column_name is not None: print("--where=%s" % args.where_column_name, file=error_file) if args.where_values is not None and len(args.where_values) > 0: print("--in=%s" % " ".join(args.where_values), file=error_file) reader_options.show(out=error_file) value_options.show(out=error_file) uniq: Unique = Unique(input_file_path=args.input_file_path, column_name=args.column_name, output_file_path=args.output_file_path, empty_value=args.empty_value, label_value=args.label_value, output_format=args.output_format, prefix=args.prefix, where_column_name=args.where_column_name, where_values=args.where_values, reader_options=reader_options, value_options=value_options, error_file=error_file, verbose=args.verbose, very_verbose=args.very_verbose) uniq.process()
def main(): """ Test the KGTK file joiner. Edge files can be joined to edge files. Node files can be joined to node files. TODO: Add more KgtkReader parameters, especially mode. """ parser = ArgumentParser() parser.add_argument(dest="left_file_path", help="The left KGTK file to join", type=Path) parser.add_argument(dest="right_file_path", help="The right KGTK file to join", type=Path) parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK file to write", type=Path, default=None) parser.add_argument( "--field-separator", dest="field_separator", help="Separator for multifield keys", default=KgtkJoiner.FIELD_SEPARATOR_DEFAULT) parser.add_argument( "--join-on-id", dest="join_on_id", help="If both input files are edge files, include the id column in the join (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--join-on-label", dest="join_on_label", help="If both input files are edge files, include the label column in the join (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--join-on-node2", dest="join_on_node2", help="If both input files are edge files, include the node2 column in the join (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--left-prefix", dest="left_prefix", help="An optional prefix applied to left file column names in the output file (default=None).") parser.add_argument( "--left-join", dest="left_join", help="Perform a left outer join (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--left-file-join-columns", dest="left_join_columns", help="Left file join columns.", nargs='+') parser.add_argument( "--right-prefix", "--prefix", dest="right_prefix", help="An optional prefix applied to right file column names in the output file (default=None).") parser.add_argument( "--right-file-join-columns", dest="right_join_columns", help="Right file join columns.", nargs='+') parser.add_argument( "--right-join", dest="right_join", help="Perform a right outer join (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) KgtkReader.add_debug_arguments(parser, expert=True) KgtkReaderOptions.add_arguments(parser, mode_options=True, who=KgtkJoiner.LEFT, expert=True) KgtkReaderOptions.add_arguments(parser, mode_options=True, who=KgtkJoiner.RIGHT, expert=True) KgtkValueOptions.add_arguments(parser, expert=True) args = parser.parse_args() error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr # Build the option structures. left_reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args, who=KgtkJoiner.LEFT) right_reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args, who=KgtkJoiner.RIGHT) value_options: KgtkValueOptions = KgtkValueOptions.from_args(args) # Show the final option structures for debugging and documentation. if args.show_options: if args.left_prefix is not None: print("--left-prefix=%s" % args.left_prefix, file=error_file, flush=True) if args.right_prefix is not None: print("--right-prefix=%s" % args.right_prefix, file=error_file, flush=True) left_reader_options.show(out=error_file, who=KgtkJoiner.LEFT) right_reader_options.show(out=error_file, who=KgtkJoiner.RIGHT) value_options.show(out=error_file) ej: KgtkJoiner = KgtkJoiner(left_file_path=args.left_file_path, right_file_path=args.right_file_path, output_path=args.output_file_path, left_join=args.left_join, right_join=args.right_join, join_on_id=args.join_on_id, join_on_label=args.join_on_label, join_on_node2=args.join_on_node2, left_join_columns=args.left_join_columns, right_join_columns=args.right_join_columns, left_prefix=args.left_prefix, right_prefix=args.right_prefix, field_separator=args.field_separator, left_reader_options=left_reader_options, right_reader_options=right_reader_options, value_options=value_options, error_file=error_file, verbose=args.verbose, very_verbose=args.very_verbose) ej.process()
def main(): """ Test the KGTK ID builder. """ parser: ArgumentParser = ArgumentParser() parser.add_argument( dest="input_file_path", help="The KGTK file with the input data (default=%(default)s)", type=Path, nargs="?", default="-") parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK file to write (default=%(default)s).", type=Path, default="-") KgtkIdBuilderOptions.add_arguments(parser) KgtkReader.add_debug_arguments(parser) KgtkReaderOptions.add_arguments(parser, mode_options=True) KgtkValueOptions.add_arguments(parser) args: Namespace = parser.parse_args() error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr # Build the option structures. idbuilder_options: KgtkIdBuilderOptions = KgtkIdBuilderOptions.from_args( args) reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args) value_options: KgtkValueOptions = KgtkValueOptions.from_args(args) # Show the final option structures for debugging and documentation. if args.show_options: print("input: %s" % str(args.input_file_path), file=error_file, flush=True) print("--output-file=%s" % str(args.output_file_path), file=error_file, flush=True) idbuilder_options.show(out=error_file) reader_options.show(out=error_file) value_options.show(out=error_file) # First create the KgtkReader. It provides parameters used by the ID # column builder. Next, create the ID column builder, which provides a # possibly revised list of column names for the KgtkWriter. Last, create # the KgtkWriter. # Open the input file. kr: KgtkReader = KgtkReader.open( args.input_file_path, error_file=error_file, options=reader_options, value_options=value_options, verbose=args.verbose, very_verbose=args.very_verbose, ) # Create the ID builder. idb: KgtkIdBuilder = KgtkIdBuilder.new(kr, idbuilder_options) # Open the output file. ew: KgtkWriter = KgtkWriter.open(idb.column_names, args.output_file_path, mode=kr.mode, require_all_columns=True, prohibit_extra_columns=True, fill_missing_columns=False, gzip_in_parallel=False, verbose=args.verbose, very_verbose=args.very_verbose) # Process the input file, building IDs. idb.process(kr, ew) ew.close() kr.close()
def main(): """ Test the KGTK ifempty processor. """ parser: ArgumentParser = ArgumentParser() parser.add_argument( dest="input_file_path", help="The KGTK file with the input data. (default=%(default)s)", type=Path, nargs="?", default="-") parser.add_argument( "--column", dest="column_name", help="The name of the column to explode. (default=%(default)s).", default="node2") fgroup: ArgumentParser = parser.add_mutually_exclusive_group() fgroup.add_argument( "--types", dest="type_names", nargs='*', help= "The KGTK data types for which fields should be exploded. (default=%(default)s).", choices=KgtkFormat.DataType.choices(), default=KgtkFormat.DataType.choices()) fgroup.add_argument( "--fields", dest="field_names", nargs='*', help= "The names of the fields to extract (overrides --types). (default=%(default)s).", choices=KgtkValueFields.FIELD_NAMES) parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK file to write (default=%(default)s).", type=Path, default="-") parser.add_argument( "--prefix", dest="prefix", help="The prefix for exploded column names. (default=%(default)s).", default="node2;kgtk:") parser.add_argument( "--overwrite", dest="overwrite_columns", help= "Indicate that it is OK to overwrite existing columns. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--expand", dest="expand_list", help= "Expand the source column if it contains a list, else fail. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) KgtkReader.add_debug_arguments(parser) KgtkReaderOptions.add_arguments(parser, mode_options=True) KgtkValueOptions.add_arguments(parser) args: Namespace = parser.parse_args() error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr # Build the option structures. reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args) value_options: KgtkValueOptions = KgtkValueOptions.from_args(args) # Show the final option structures for debugging and documentation. if args.show_options: # TODO: show ifempty-specific options. print("input: %s" % str(args.input_file_path), file=error_file, flush=True) print("--column %s" % args.column_name, file=error_file, flush=True) print("--prefix %s" % args.prefix, file=error_file, flush=True) print("--overwrite %s" % str(args.overwrite_columns), file=error_file, flush=True) print("--expand %s" % str(args.expand_list), file=error_file, flush=True) if args.field_names is not None: print("--fields %s" % " ".join(args.field_names), file=error_file, flush=True) if args.type_names is not None: print("--types %s" % " ".join(args.type_names), file=error_file, flush=True) print("--output-file=%s" % str(args.output_file_path)) reader_options.show(out=error_file) value_options.show(out=error_file) ex: KgtkExplode = KgtkExplode(input_file_path=args.input_file_path, column_name=args.column_name, prefix=args.prefix, field_names=args.field_names, type_names=args.type_names, overwrite_columns=args.overwrite_columns, expand_list=args.expand_list, output_file_path=args.output_file_path, reader_options=reader_options, value_options=value_options, error_file=error_file, verbose=args.verbose, very_verbose=args.very_verbose) ex.process()
def main(): """ Test the KGTK file joiner. """ parser: ArgumentParser = ArgumentParser() parser.add_argument(dest="input_file_path", help="The KGTK file with the input data", type=Path, nargs="?") parser.add_argument( "--filter-on", dest="filter_file_path", help="The KGTK file with the filter data (required).", type=Path, required=True) parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK file to write (default=%(default)s).", type=Path, default="-") parser.add_argument( "--field-separator", dest="field_separator", help="Separator for multifield keys (default=%(default)s)", default=KgtkIfExists.FIELD_SEPARATOR_DEFAULT) parser.add_argument( "--invert", dest="invert", help="Invert the test (if not exists) (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--cache-input", dest="cache_input", help="Cache the input file instead of the filter keys. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--preserve-order", dest="preserve_order", help="Preserve record order when cacheing the input file. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--input-keys", dest="input_keys", help="The key columns in the input file (default=None).", nargs='*') parser.add_argument( "--filter-keys", dest="filter_keys", help="The key columns in the filter file (default=None).", nargs='*') KgtkReader.add_debug_arguments(parser) KgtkReaderOptions.add_arguments(parser, mode_options=True, who="input") KgtkReaderOptions.add_arguments(parser, mode_options=True, who="filter") KgtkValueOptions.add_arguments(parser) args: Namespace = parser.parse_args() error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr # Build the option structures. input_reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args, who="input") filter_reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args, who="filter") value_options: KgtkValueOptions = KgtkValueOptions.from_args(args) # Show the final option structures for debugging and documentation. if args.show_options: print("input: %s" % (str(args.input_file_path) if args.input_file_path is not None else "-"), file=error_file) print("--filter-on=%s" % str(args.filter_file_path), file=error_file) print("--output-file=%s" % str(args.output_file_path), file=error_file) print("--field-separator=%s" % repr(args.field_separator), file=error_file) print("--invert=%s" % str(args.invert), file=error_file) print("--cache-input=%s" % str(args.cache_input), file=error_file) print("--preserve-order=%s" % str(args.preserve_order), file=error_file) if args.input_keys is not None: print("--input-keys %s" % " ".join(args.input_keys), file=error_file) if args.filter_keys is not None: print("--filter-keys %s" % " ".join(args.filter_keys), file=error_file) input_reader_options.show(out=error_file, who="input") filter_reader_options.show(out=error_file, who="filter") value_options.show(out=error_file) ie: KgtkIfExists = KgtkIfExists( input_file_path=args.input_file_path, input_keys=args.input_keys, filter_file_path=args.filter_file_path, filter_keys=args.filter_keys, output_file_path=args.output_file_path, field_separator=args.field_separator, invert=args.invert, cache_input=args.cache_input, preserve_order=args.preserve_order, input_reader_options=input_reader_options, filter_reader_options=filter_reader_options, value_options=value_options, error_file=error_file, verbose=args.verbose, very_verbose=args.very_verbose) ie.process()
def main(): """ Test the KGTK compact processor. """ parser: ArgumentParser = ArgumentParser() parser.add_argument(dest="input_file_path", help="The KGTK file with the input data (default=%(default)s)", type=Path, nargs="?", default="-") parser.add_argument( "--columns", dest="key_column_names", help="The key columns to identify records for compaction. " + "(default=id for node files, (node1, label, node2, id) for edge files).", nargs='+', default=[ ]) parser.add_argument( "--compact-id", dest="compact_id", help="Indicate that the ID column in KGTK edge files should be compacted. " + "Normally, if the ID column exists, it is not compacted, " + "as there are use cases that need to maintain distinct lists of secondary edges for each ID value. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--presorted", dest="sorted_input", help="Indicate that the input has been presorted (or at least pregrouped). (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--verify-sort", dest="verify_sort", help="If the input has been presorted, verify its consistency (disable if only pregrouped). (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=True) parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK file to write (default=%(default)s).", type=Path, default="-") parser.add_argument( "--build-id", dest="build_id", help="Build id values in an id column. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) KgtkIdBuilderOptions.add_arguments(parser) KgtkReader.add_debug_arguments(parser) KgtkReaderOptions.add_arguments(parser, mode_options=True) KgtkValueOptions.add_arguments(parser) args: Namespace = parser.parse_args() error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr # Build the option structures. idbuilder_options: KgtkIdBuilderOptions = KgtkIdBuilderOptions.from_args(args) reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args) value_options: KgtkValueOptions = KgtkValueOptions.from_args(args) # Show the final option structures for debugging and documentation. if args.show_options: print("input: %s" % str(args.input_file_path), file=error_file, flush=True) print("--columns %s" % " ".join(args.key_column_names), file=error_file, flush=True) print("--compact-id=%s" % str(args.compact_id), file=error_file, flush=True) print("--presorted=%s" % str(args.sorted_input), file=error_file, flush=True) print("--verify-sort=%s" % str(args.verify_sort), file=error_file, flush=True) print("--output-file=%s" % str(args.output_file_path), file=error_file, flush=True) print("--build-id=%s" % str(args.build_id), file=error_file, flush=True) idbuilder_options.show(out=error_file) reader_options.show(out=error_file) value_options.show(out=error_file) kc: KgtkCompact = KgtkCompact( input_file_path=args.input_file_path, key_column_names=args.key_column_names, compact_id=args.compact_id, sorted_input=args.sorted_input, verify_sort=args.verify_sort, output_file_path=args.output_file_path, build_id=args.build_id, idbuilder_options=idbuilder_options, reader_options=reader_options, value_options=value_options, error_file=error_file, verbose=args.verbose, very_verbose=args.very_verbose) kc.process()
def main(): """ Test the KGTK ifempty processor. """ parser: ArgumentParser = ArgumentParser() parser.add_argument(dest="input_file_path", help="The KGTK file with the input data", type=Path, nargs="?") parser.add_argument("--columns", dest="filter_column_names", help="The columns to filter on (default=None).", nargs='+', required=True) parser.add_argument( "--count", dest="only_count", help="Only count the records, do not copy them. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK file to write (default=%(default)s).", type=Path, default="-") parser.add_argument( "--all", dest="all_are", help= "False: Test if any are, True: test if all are (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--not-empty", dest="notempty", help= "False: test if empty, True: test if not empty (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) KgtkReader.add_debug_arguments(parser) KgtkReaderOptions.add_arguments(parser, mode_options=True) KgtkValueOptions.add_arguments(parser) args: Namespace = parser.parse_args() error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr # Build the option structures. reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args) value_options: KgtkValueOptions = KgtkValueOptions.from_args(args) # Show the final option structures for debugging and documentation. if args.show_options: # TODO: show ifempty-specific options. reader_options.show(out=error_file) value_options.show(out=error_file) ie: KgtkIfEmpty = KgtkIfEmpty(input_file_path=args.input_file_path, filter_column_names=args.filter_column_names, output_file_path=args.output_file_path, all_are=args.all_are, notempty=args.notempty, only_count=args.only_count, reader_options=reader_options, value_options=value_options, error_file=error_file, verbose=args.verbose, very_verbose=args.very_verbose) ie.process()
def main(): """ Test the KGTK copy template. """ parser: ArgumentParser = ArgumentParser() parser.add_argument("-i", "--input-file", dest="input_file_path", help="The KGTK input file. (default=%(default)s)", type=Path, default="-") parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK output file. (default=%(default)s).", type=Path, default="-") parser.add_argument( "--reified-file", dest="reified_file_path", help= "A KGTK output file that will contain only the reified values. (default=%(default)s).", type=Path, default=None) parser.add_argument( "--unreified-file", dest="unreified_file_path", help= "A KGTK output file that will contain only the unreified values. (default=%(default)s).", type=Path, default=None) parser.add_argument( "--uninvolved-file", dest="uninvolved_file_path", help= "A KGTK output file that will contain only the uninvolved input records. (default=%(default)s).", type=Path, default=None) parser.add_argument("--output-format", dest="output_format", help="The file format (default=kgtk)", type=str, choices=KgtkWriter.OUTPUT_FORMAT_CHOICES) KgtkUnreifyValues.add_arguments(parser) KgtkReader.add_debug_arguments(parser) KgtkReaderOptions.add_arguments(parser, mode_options=False, expert=True) KgtkValueOptions.add_arguments(parser) args: Namespace = parser.parse_args() error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr # Build the option structures. reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args) value_options: KgtkValueOptions = KgtkValueOptions.from_args(args) # Show the final option structures for debugging and documentation. if args.show_options: print("--input-files %s" % " ".join([str(path) for path in input_file_paths]), file=error_file, flush=True) print("--output-file=%s" % str(args.output_file_path), file=error_file, flush=True) if args.reified_file_path is not None: print("--reified-file=%s" % str(args.reified_file_path), file=error_file, flush=True) if args.unreified_file_path is not None: print("--unreified-file=%s" % str(args.unreified_file_path), file=error_file, flush=True) if args.uninvolved_file_path is not None: print("--uninvolved-file=%s" % str(args.uninvolved_file_path), file=error_file, flush=True) if args.output_format is not None: print("--output-format=%s" % args.output_format, file=error_file, flush=True) if args.trigger_label_value is not None: print("--trigger-label=%s" % args.trigger_label_value, file=error_file, flush=True) if args.trigger_node2_value is not None: print("--trigger-node2=%s" % args.trigger_node2_value, file=error_file, flush=True) if args.value_label_value is not None: print("--value-label=%s" % args.value_label_value, file=error_file, flush=True) if args.old_label_value is not None: print("--old-label=%s" % args.old_label_value, file=error_file, flush=True) if args.new_label_value is not None: print("--new-label=%s" % args.new_label_value, file=error_file, flush=True) print("--allow-multiple-values=%s" % str(args.allow_multiple_values), file=error_file, flush=True) print("--allow-extra-columns=%s" % str(args.allow_extra_columns), file=error_file, flush=True) reader_options.show(out=error_file) value_options.show(out=error_file) kuv: KgtkUnreifyValues = KgtkUnreifyValues( input_file_path=args.input_file_path, output_file_path=args.output_file_path, reified_file_path=args.reified_file_path, unreified_file_path=args.unreified_file_path, uninvolved_file_path=args.uninvolved_file_path, trigger_label_value=args.trigger_label_value, trigger_node2_value=args.trigger_node2_value, value_label_value=args.value_label_value, old_label_value=args.old_label_value, new_label_value=args.new_label_value, allow_multiple_values=args.allow_multiple_values, allow_extra_columns=args.allow_extra_columns, reader_options=reader_options, value_options=value_options, output_format=args.output_format, error_file=error_file, verbose=args.verbose, very_verbose=args.very_verbose, ) kuv.process()
def main(): """ Test the KGTK lift processor. """ parser: ArgumentParser = ArgumentParser() parser.add_argument(dest="input_file_path", help="The KGTK file with the input data", type=Path, default="-") parser.add_argument( "--label-file", dest="label_file_path", help="A KGTK file with label records (default=%(default)s).", type=Path, default=None) parser.add_argument( "--node1-name", dest="node1_column_name", help="The name of the node1 column. (default=node1 or alias).", default=None) parser.add_argument("--label-name", dest="label_column_name", help="The name of the label column. (default=label).", default=None) parser.add_argument( "--node2-name", dest="node2_column_name", help="The name of the node2 column. (default=node2 or alias).", default=None) parser.add_argument( "--label-value", dest="label_column_value", help="The value in the label column. (default=%(default)s).", default="label") parser.add_argument( "--lift-suffix", dest="lifted_column_suffix", help= "The suffix used for newly created columns. (default=%(default)s).", default=";label") parser.add_argument( "--columns-to-lift", dest="lift_column_names", help="The columns to lift. (default=[node1, label, node2]).", nargs='*') parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK file to write (default=%(default)s).", type=Path, default="-") parser.add_argument( "--remove-label-records", dest="remove_label_records", help= "If true, remove label records from the output. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=True) parser.add_argument( "--sort-lifted-labels", dest="sort_lifted_labels", help="If true, sort lifted labels with lists. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=True) parser.add_argument( "--suppress-duplicate-labels", dest="suppress_duplicate_labels", help= "If true, suppress duplicate values in lifted labels with lists (implies sorting). (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=True) parser.add_argument( "--suppress-empty-columns", dest="suppress_empty_columns", help= "If true, do not create new columns that would be empty. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--ok-if-no-labels", dest="ok_if_no_labels", help= "If true, do not abort if no labels were found. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--input-file-is-presorted", dest="input_is_presorted", help= "If true, the input file is presorted on the column for which values are to be lifted. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--label-file-is-presorted", dest="labels_are_presorted", help= "If true, the label file is presorted on the node1 column. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) KgtkReader.add_debug_arguments(parser) # TODO: seperate reader options for the label file. KgtkReaderOptions.add_arguments(parser, mode_options=True) KgtkValueOptions.add_arguments(parser) args: Namespace = parser.parse_args() error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr # Build the option structures. reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args) value_options: KgtkValueOptions = KgtkValueOptions.from_args(args) # Show the final option structures for debugging and documentation. if args.show_options: print("input: %s" % str(args.input_file_path), file=error_file, flush=True) if args.label_file_path is not None: print("--label-file=%s" % str(args.label_file_path), file=error_file, flush=True) if args.node1_column_name is not None: print("--node1-name=%s" % args.node1_column_name, file=error_file, flush=True) if args.label_column_name is not None: print("--label-name=%s" % args.label_column_name, file=error_file, flush=True) if args.node2_column_name is not None: print("--node2-name=%s" % args.node2_column_name, file=error_file, flush=True) print("--label-value=%s" % args.label_column_value, file=error_file, flush=True) print("--lift-suffix=%s" % args.lifted_column_suffix, file=error_file, flush=True) if args.lift_column_names is not None and len( args.lift_column_names) > 0: print("--columns-to-lift %s" % " ".join(args.lift_column_names), file=error_file, flush=True) print("--output-file=%s" % str(args.output_file_path), file=error_file, flush=True) print("--remove-label-records=%s" % str(args.remove_label_records)) print("--sort-lifted-labels-labels=%s" % str(args.sort_lifted_labels)) print("--suppress-duplicate-labels=%s" % str(args.suppress_duplicate_labels)) print("--suppress-empty-columns=%s" % str(args.suppress_empty_columns)) print("--ok-if-no-labels=%s" % str(args.ok_if_no_labels)) print("--input-file-is-presorted=%s" % str(args.input_is_presorted)) print("--label-file-is-presorted=%s" % str(args.labels_are_presorted)) reader_options.show(out=error_file) value_options.show(out=error_file) kl: KgtkLift = KgtkLift( input_file_path=args.input_file_path, label_file_path=args.label_file_path, node1_column_name=args.node1_column_name, label_column_name=args.label_column_name, node2_column_name=args.node2_column_name, label_column_value=args.label_column_value, lifted_column_suffix=args.lifted_column_suffix, lift_column_names=args.lift_column_names, output_file_path=args.output_file_path, remove_label_records=args.remove_label_records, sort_lifted_labels=args.sort_lifted_labels, suppress_duplicate_labels=args.suppress_duplicate_labels, suppress_empty_columns=args.suppress_empty_columns, ok_if_no_labels=args.ok_if_no_labels, input_is_presorted=args.input_is_presorted, labels_are_presorted=args.labels_are_presorted, reader_options=reader_options, value_options=value_options, error_file=error_file, verbose=args.verbose, very_verbose=args.very_verbose) kl.process()
def main(): """ Test the KGTK compact processor. TODO: Support the list output file. """ parser: ArgumentParser = ArgumentParser() parser.add_argument( dest="input_file_path", help="The KGTK file with the input data (default=%(default)s)", type=Path, nargs="?", default="-") parser.add_argument( "--columns", dest="key_column_names", help="The key columns to identify records for compaction. " + "(default=id for node files, (node1, label, node2, id) for edge files).", nargs='+', default=[]) parser.add_argument( "--keep-first", dest="keep_first_names", help= "If compaction results in a list of values for any column on this list, keep only the first value after sorting. " + "(default=none).", nargs='+', default=[]) parser.add_argument( "--compact-id", dest="compact_id", help= "Indicate that the ID column in KGTK edge files should be compacted. " + "Normally, if the ID column exists, it is not compacted, " + "as there are use cases that need to maintain distinct lists of secondary edges for each ID value. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--deduplicate", dest="deduplicate", help= "Treat all columns as key columns, overriding --columns and --compact-id. " + "This will remove completely duplicate records without compacting any new lists. " + "(default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False, metavar="True|False") parser.add_argument( "--presorted", dest="sorted_input", help= "Indicate that the input has been presorted (or at least pregrouped). (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--verify-sort", dest="verify_sort", help= "If the input has been presorted, verify its consistency (disable if only pregrouped). (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=True) parser.add_argument( "--lists-in-input", dest="lists_in_input", help= "Assume that the input file may contain lists (disable when certain it does not). (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=True) parser.add_argument( "--report-lists", dest="report_lists", help= "When True, report records with lists to the error output. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--exclude-lists", dest="exclude_lists", help= "When True, exclude records with lists from the output. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--output-only-lists", dest="output_only_lists", help= "When True, output only records containing lists. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK file to write (default=%(default)s).", type=Path, default="-") parser.add_argument( "--build-id", dest="build_id", help="Build id values in an id column. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) KgtkIdBuilderOptions.add_arguments(parser) KgtkReader.add_debug_arguments(parser) KgtkReaderOptions.add_arguments(parser, mode_options=True) KgtkValueOptions.add_arguments(parser) args: Namespace = parser.parse_args() error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr # Build the option structures. idbuilder_options: KgtkIdBuilderOptions = KgtkIdBuilderOptions.from_args( args) reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args) value_options: KgtkValueOptions = KgtkValueOptions.from_args(args) # Show the final option structures for debugging and documentation. if args.show_options: print("input: %s" % str(args.input_file_path), file=error_file, flush=True) print("--output-file=%s" % str(args.output_file_path), file=error_file, flush=True) print("--columns %s" % " ".join(args.key_column_names), file=error_file, flush=True) print("--keep-first %s" % " ".join(args.keep_first_names), file=error_file, flush=True) print("--compact-id=%s" % str(args.compact_id), file=error_file, flush=True) print("--deduplicate=%s" % str(args.deduplicate), file=error_file, flush=True) print("--presorted=%s" % str(args.sorted_input), file=error_file, flush=True) print("--verify-sort=%s" % str(args.verify_sort), file=error_file, flush=True) print("--lists-in-input=%s" % str(args.lists_in_input), file=error_file, flush=True) print("--report-lists=%s" % str(args.report_lists), file=error_file, flush=True) print("--exclude-lists=%s" % str(args.exclude_lists), file=error_file, flush=True) print("--output-only-lists=%s" % str(args.output_only_lists), file=error_file, flush=True) print("--build-id=%s" % str(args.build_id), file=error_file, flush=True) idbuilder_options.show(out=error_file) reader_options.show(out=error_file) value_options.show(out=error_file) kc: KgtkCompact = KgtkCompact(input_file_path=args.input_file_path, key_column_names=args.key_column_names, keep_first_names=args.keep_first_names, compact_id=args.compact_id, deduplicate=args.deduplicate, sorted_input=args.sorted_input, verify_sort=args.verify_sort, lists_in_input=args.lists_in_input, report_lists=args.report_lists, exclude_lists=args.exclude_lists, output_only_lists=args.output_only_lists, output_file_path=args.output_file_path, build_id=args.build_id, idbuilder_options=idbuilder_options, reader_options=reader_options, value_options=value_options, error_file=error_file, verbose=args.verbose, very_verbose=args.very_verbose) kc.process()
def main(): """ Test the KGTK file concatenator. """ parser = ArgumentParser() parser.add_argument(dest="input_file_paths", help="The KGTK files to concatenate", type=Path, nargs='+') parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK file to write (default=%(default)s)", type=Path, default="-") parser.add_argument("--output-format", dest="output_format", help="The file format (default=kgtk)", type=str, choices=KgtkWriter.OUTPUT_FORMAT_CHOICES) parser.add_argument( "--output-columns", dest="output_column_names", help="Rename all output columns. (default=%(default)s)", type=str, nargs='+') parser.add_argument( "--old-columns", dest="old_column_names", help="Rename seleted output columns: old names. (default=%(default)s)", type=str, nargs='+') parser.add_argument( "--new-columns", dest="new_column_names", help="Rename seleted output columns: new names. (default=%(default)s)", type=str, nargs='+') KgtkReader.add_debug_arguments(parser, expert=True) KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=True) KgtkValueOptions.add_arguments(parser, expert=True) args = parser.parse_args() error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr # Build the option structures. reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args) value_options: KgtkValueOptions = KgtkValueOptions.from_args(args) # Show the final option structures for debugging and documentation. if args.show_options: input_files: typing.List[str] = [] input_file: Path for input_file in args.input_file_paths: input_files.append(str(input_file)) print("input: %s" % " ".join(input_files), file=error_file, flush=True) print("--output-file=%s" % args.output_file_path, file=error_file, flush=True) if args.output_format is not None: print("--output-format=%s" % args.output_format, file=error_file, flush=True) if args.output_column_names is not None: print("--output-columns=%s" % " ".join(args.output_column_names), file=error_file, flush=True) if args.old_column_names is not None: print("--old-columns=%s" % " ".join(args.old_column_names), file=error_file, flush=True) if args.new_column_names is not None: print("--new-columns=%s" % " ".join(args.new_column_names), file=error_file, flush=True) reader_options.show(out=error_file) value_options.show(out=error_file) kc: KgtkCat = KgtkCat(input_file_paths=args.input_file_paths, output_path=args.output_file_path, output_format=args.output_format, output_column_names=args.output_column_names, old_column_names=args.old_column_names, new_column_names=args.new_column_names, reader_options=reader_options, value_options=value_options, error_file=error_file, verbose=args.verbose, very_verbose=args.very_verbose) kc.process()
def main(): """ Test the KGTK implode processor. """ parser: ArgumentParser = ArgumentParser() parser.add_argument( dest="input_file_path", help="The KGTK file with the input data. (default=%(default)s)", type=Path, nargs="?", default="-") parser.add_argument( "--column", dest="column_name", help="The name of the column to explode. (default=%(default)s).", default="node2") parser.add_argument( "--types", dest="type_names", nargs='*', help= "The KGTK data types for which fields should be imploded. (default=%(default)s).", choices=KgtkFormat.DataType.choices(), default=KgtkFormat.DataType.choices()) parser.add_argument( "--without", dest="without_fields", nargs='*', help="The KGTK fields to do without. (default=%(default)s).", choices=KgtkValueFields.OPTIONAL_DEFAULT_FIELD_NAMES, default=None) parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK file to write (default=%(default)s).", type=Path, default="-") parser.add_argument( "--prefix", dest="prefix", help="The prefix for exploded column names. (default=%(default)s).", default="node2;kgtk:") parser.add_argument( "--overwrite", dest="overwrite_column", help= "Indicate that it is OK to overwrite an existing imploded column. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=True) parser.add_argument( "--validate", dest="validate", help="Validate imploded values. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=True) parser.add_argument( "--escape-pipes", dest="escape_pipes", help= "When true, pipe characters (|) need to be escaped (\\|) per KGTK file format. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--quantities-include-numbers", dest="quantities_include_numbers", help= "When true, numbers are acceptable quantities. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=True) parser.add_argument( "--general-strings", dest="general_strings", help= "When true, strings may include language qualified strings. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=True) parser.add_argument( "--remove-prefixed-columns", dest="remove_prefixed_columns", help= "When true, remove all columns beginning with the prefix from the output file. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--ignore-unselected-types", dest="ignore_unselected_types", help= "When true, input records with valid but unselected data types will be passed through to output. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=True) parser.add_argument( "--retain-unselected-types", dest="retain_unselected_types", help= "When true, input records with valid but unselected data types will be retain existing data on output. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=True) parser.add_argument( "--build-id", dest="build_id", help="Build id values in an id column. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--reject-file", dest="reject_file_path", help= "The KGTK file into which to write rejected records (default=%(default)s).", type=Path, default=None) KgtkIdBuilderOptions.add_arguments(parser) KgtkReader.add_debug_arguments(parser) KgtkReaderOptions.add_arguments(parser, mode_options=True) KgtkValueOptions.add_arguments(parser) args: Namespace = parser.parse_args() error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr # Build the option structures. idbuilder_options: KgtkIdBuilderOptions = KgtkIdBuilderOptions.from_args( args) reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args) value_options: KgtkValueOptions = KgtkValueOptions.from_args(args) # Show the final option structures for debugging and documentation. if args.show_options: # TODO: show ifempty-specific options. print("input: %s" % str(args.input_file_path), file=error_file, flush=True) print("--column %s" % args.column_name, file=error_file, flush=True) print("--prefix %s" % args.prefix, file=error_file, flush=True) print("--overwrite %s" % str(args.overwrite_column), file=error_file, flush=True) print("--validate %s" % str(args.validate), file=error_file, flush=True) print("--escape-pipes %s" % str(args.escape_pipes), file=error_file, flush=True) print("--quantities-include-numbers %s" % str(args.quantities_include_numbers), file=error_file, flush=True) print("--general-strings %s" % str(args.general_strings), file=error_file, flush=True) print("--remove-prefixed-columns %s" % str(args.remove_prefixed_columns), file=error_file, flush=True) print("--ignore-unselected-types %s" % str(args.ignore_unselected_types), file=error_file, flush=True) print("--retain-unselected-types %s" % str(args.retain_unselected_types), file=error_file, flush=True) if args.type_names is not None: print("--types %s" % " ".join(args.type_names), file=error_file, flush=True) if args.without_fields is not None: print("--without %s" % " ".join(args.without_fields), file=error_file, flush=True) print("--output-file=%s" % str(args.output_file_path), file=error_file, flush=True) if args.reject_file_path is not None: print("--reject-file=%s" % str(args.reject_file_path), file=error_file, flush=True) print("--build-id=%s" % str(args.build_id), file=error_file, flush=True) idbuilder_options.show(out=error_file) reader_options.show(out=error_file) value_options.show(out=error_file) without_fields: typing.List[ str] = args.without_fields if args.without_fields is not None else list( ) ex: KgtkImplode = KgtkImplode( input_file_path=args.input_file_path, column_name=args.column_name, prefix=args.prefix, type_names=args.type_names, without_fields=without_fields, overwrite_column=args.overwrite_column, validate=args.validate, escape_pipes=args.escape_pipes, quantities_include_numbers=args.quantities_include_numbers, general_strings=args.general_strings, remove_prefixed_columns=args.remove_prefixed_columns, ignore_unselected_types=args.ignore_unselected_types, retain_unselected_types=args.retain_unselected_types, output_file_path=args.output_file_path, reject_file_path=args.reject_file_path, build_id=args.build_id, idbuilder_options=idbuilder_options, reader_options=reader_options, value_options=value_options, error_file=error_file, verbose=args.verbose, very_verbose=args.very_verbose) ex.process()
def main(): """ Test the KGTK copy template. """ parser: ArgumentParser = ArgumentParser() parser.add_argument("-i", "--input-file", dest="input_file_path", help="The KGTK input file. (default=%(default)s)", type=Path, default="-") parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK output file. (default=%(default)s).", type=Path, default="-") parser.add_argument( "--keygen", dest="keygen", help="The KGTK key generation procedure. (default=%(default)s).", type=str, default="node1") parser.add_argument( "--group-sort", dest="group_sort", help="If true, use the grouped sort and buffer. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--group-iterate", dest="group_iterate", help="If true, us the grouped iteration. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) KgtkReader.add_debug_arguments(parser) KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=True) KgtkValueOptions.add_arguments(parser) args: Namespace = parser.parse_args() error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr # Build the option structures. reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args) value_options: KgtkValueOptions = KgtkValueOptions.from_args(args) # Show the final option structures for debugging and documentation. if args.show_options: print("--input-files %s" % " ".join([str(path) for path in input_file_paths]), file=error_file, flush=True) print("--output-file=%s" % str(args.output_file_path), file=error_file, flush=True) print("--keygen=%s" % str(args.keygen), file=error_file, flush=True) print("--group-sort=%s" % str(args.group_sort), file=error_file, flush=True) print("--group-iterate=%s" % str(args.group_iterate), file=error_file, flush=True) reader_options.show(out=error_file) value_options.show(out=error_file) ksbt: KgtkSortBufferTest = KgtkSortBufferTest( input_file_path=args.input_file_path, output_file_path=args.output_file_path, keygen=args.keygen, group_sort=args.group_sort, group_iterate=args.group_iterate, reader_options=reader_options, value_options=value_options, error_file=error_file, verbose=args.verbose, very_verbose=args.very_verbose, ) ksbt.process()
def main(): """ Test the KGTK lift processor. """ parser: ArgumentParser = ArgumentParser() parser.add_argument(dest="input_file_path", help="The KGTK file with the input data", type=Path, default="-") parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK file to write (default=%(default)s).", type=Path, default="-") parser.add_argument( "--label-file", dest="label_file_path", help="An optional KGTK file with label records (default=%(default)s).", type=Path, default=None) parser.add_argument( "--input-select-column", "--input-label-column", dest="input_select_column_name", help="If input record selection is enabled by --input-select-value, " + "the name of a column that determines which records received lifted values. " + "The default is the 'label' column or its alias.", default=None) parser.add_argument( "--input-select-value", "--input-label-value", "--target-label-value", dest="input_select_column_value", help= "The value in the input select column that identifies a record to receive lifted values. " + "The default is not to perform input record selection, " + "and all input records except label records may receive lifted values. ", default=None) parser.add_argument( "--columns-to-lift", dest="input_lifting_column_names", help="The columns for which matching labels are to be lifted. " + "The default is [node1, label, node2] or their aliases.", nargs='*') parser.add_argument( "--lift-suffix", dest="output_lifted_column_suffix", help= "The suffix used for newly created output columns. (default=%(default)s).", default=KgtkLift.DEFAULT_OUTPUT_LIFTED_COLUMN_SUFFIX) parser.add_argument( "--update-select-value", "--target-new-label-value", dest="output_select_column_value", help= "A new value for the select (label) column for records that received lifted values. " + "The default is not to update the select(label) column.", default=None) parser.add_argument( "--label-select-column", "--label-name", dest="label_select_column_name", help= "The name of the column that contains a special value that identifies label records. " + "The default is 'label' or its alias.", default=None) parser.add_argument( "-p", "--label-select-value", "--label-value", "--property", dest="label_select_column_value", help= "The special value in the label select column that identifies a label record. " + "(default=%(default)s).", default=KgtkLift.DEFAULT_LABEL_SELECT_COLUMN_VALUE) parser.add_argument( "--label-match-column", "--node1-name", dest="label_match_column_name", help= "The name of the column in the label records that contains the value " + "that matches the value in a column being lifted in the input records. " + "The default is 'node1' or its alias.", default=None) parser.add_argument( "--label-value-column", "--node2-name", "--lift-from", dest="label_value_column_name", help= "The name of the column in the label record that contains the value " + "to be lifted into the input record that is receiving lifted values. " + "The default is 'node2' or its alias.", default=None) parser.add_argument( "--remove-label-records", dest="remove_label_records", help= "If true, remove label records from the output. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=True) parser.add_argument( "--sort-lifted-labels", dest="sort_lifted_labels", help="If true, sort lifted labels with lists. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=True) parser.add_argument( "--suppress-duplicate-labels", dest="suppress_duplicate_labels", help= "If true, suppress duplicate values in lifted labels with lists (implies sorting). (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=True) parser.add_argument( "--suppress-empty-columns", dest="suppress_empty_columns", help= "If true, do not create new columns that would be empty. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--ok-if-no-labels", dest="ok_if_no_labels", help= "If true, do not abort if no labels were found. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--prefilter-labels", dest="prefilter_labels", help= "If true, read the input file before reading the label file. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--input-file-is-presorted", dest="input_is_presorted", help= "If true, the input file is presorted on the column for which values are to be lifted. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--label-file-is-presorted", dest="labels_are_presorted", help= "If true, the label file is presorted on the node1 column. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) KgtkReader.add_debug_arguments(parser) # TODO: seperate reader options for the label file. KgtkReaderOptions.add_arguments(parser, mode_options=True) KgtkValueOptions.add_arguments(parser) args: Namespace = parser.parse_args() error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr # Build the option structures. reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args) value_options: KgtkValueOptions = KgtkValueOptions.from_args(args) # Show the final option structures for debugging and documentation. if args.show_options: print("input: %s" % str(args.input_file_path), file=error_file, flush=True) if args.label_file_path is not None: print("--label-file=%s" % str(args.label_file_path), file=error_file, flush=True) print("--output-file=%s" % str(args.output_file_path), file=error_file, flush=True) if args.input_select_column_name is not None: print("--input-select-column=%s" % args.input_select_column_name, file=error_file, flush=True) if args.input_select_column_value is not None: print("--input-select-value=%s" % args.input_select_column_value, file=error_file, flush=True) if args.input_lifting_column_names is not None and len( args.input_lifting_column_names) > 0: print("--columns-to-lift %s" % " ".join(args.input_lifting_column_names), file=error_file, flush=True) print("--lift-suffix=%s" % args.output_lifted_column_suffix, file=error_file, flush=True) if args.output_select_column_value is not None: print("--update-label-value=%s" % args.output_select_column_value, file=error_file, flush=True) if args.label_select_column_name is not None: print("--label-select-column=%s" % args.label_select_column_name, file=error_file, flush=True) if args.label_select_column_value is not None: print("--label-select-value=%s" % args.label_select_column_value, file=error_file, flush=True) if args.label_match_column_name is not None: print("--label-match-column=%s" % args.label_match_column_name, file=error_file, flush=True) if args.label_value_column_name is not None: print("--label-value-column=%s" % args.label_value_column_name, file=error_file, flush=True) print("--remove-label-records=%s" % str(args.remove_label_records)) print("--sort-lifted-labels-labels=%s" % str(args.sort_lifted_labels)) print("--suppress-duplicate-labels=%s" % str(args.suppress_duplicate_labels)) print("--suppress-empty-columns=%s" % str(args.suppress_empty_columns)) print("--ok-if-no-labels=%s" % str(args.ok_if_no_labels)) print("--prefilter-labels=%s" % str(args.prefilter_labels)) print("--input-file-is-presorted=%s" % str(args.input_is_presorted)) print("--label-file-is-presorted=%s" % str(args.labels_are_presorted)) reader_options.show(out=error_file) value_options.show(out=error_file) kl: KgtkLift = KgtkLift( input_file_path=args.input_file_path, label_file_path=args.label_file_path, output_file_path=args.output_file_path, input_select_column_name=args.input_select_column_name, input_select_column_value=args.input_select_column_value, input_lifting_column_names=args.input_lifting_column_names, output_select_column_value=args.output_select_column_value, output_lifted_column_suffix=args.output_lifted_column_suffix, label_select_column_name=args.label_select_column_name, label_select_column_value=args.label_select_column_value, label_match_column_name=args.label_match_column_name, label_value_column_name=args.label_value_column_name, remove_label_records=args.remove_label_records, sort_lifted_labels=args.sort_lifted_labels, suppress_duplicate_labels=args.suppress_duplicate_labels, suppress_empty_columns=args.suppress_empty_columns, ok_if_no_labels=args.ok_if_no_labels, prefilter_labels=args.prefilter_labels, input_is_presorted=args.input_is_presorted, labels_are_presorted=args.labels_are_presorted, reader_options=reader_options, value_options=value_options, error_file=error_file, verbose=args.verbose, very_verbose=args.very_verbose) kl.process()