def main(): """ Test the KGTK node file reader. """ parser = ArgumentParser() parser.add_argument(dest="kgtk_file", help="The KGTK edge file to read", type=Path, nargs="?") KgtkReader.add_debug_arguments(parser, expert=True) KgtkReaderOptions.add_arguments(parser, validate_by_default=True, expert=True) KgtkValueOptions.add_arguments(parser, expert=True) args = parser.parse_args() error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr # Build the option structures. reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args, mode=KgtkReaderMode.NODE) value_options: KgtkValueOptions = KgtkValueOptions.from_args(args) if args.show_options: reader_options.show(out=error_file) value_options.show(out=error_file) print("=======", file=error_file, flush=True) nr: NodeReader = NodeReader.open_node_file(args.kgtk_file, error_file=error_file, options=reader_options, value_options=value_options, verbose=args.verbose, very_verbose=args.very_verbose) line_count: int = 0 row: typing.List[str] for row in nr: line_count += 1 print("Read %d lines" % line_count)
def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Namespace): """ Parse arguments Args: parser (argparse.ArgumentParser) """ from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions, KgtkReaderMode from kgtk.value.kgtkvalueoptions import KgtkValueOptions _expert: bool = parsed_shared_args._expert # This helper function makes it easy to suppress options from # The help message. The options are still there, and initialize # what they need to initialize. def h(msg: str)->str: if _expert: return msg else: return SUPPRESS parser.add_input_file(who="The KGTK file to convert to an HTML table.", positional=True) parser.add_output_file(who="The GitHub markdown file to write.") parser.add_argument( "--output-format", dest="output_format", help=h("The file format (default=%(default)s)"), type=str, default="table") KgtkReader.add_debug_arguments(parser, expert=_expert) KgtkReaderOptions.add_arguments(parser, mode_options=True, default_mode=KgtkReaderMode.NONE, expert=_expert) KgtkValueOptions.add_arguments(parser, expert=_expert)
def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Namespace): """ Parse arguments Args: parser (argparse.ArgumentParser) """ _expert: bool = parsed_shared_args._expert parser.add_input_file(positional=True) parser.add_output_file() parser.add_argument( "--columns", dest="filter_column_names", help="The columns in the file being filtered (Required).", nargs='+', required=True) parser.add_argument( "--count", dest="only_count", metavar="True|False", help="Only count the records, do not copy them. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--all", dest="all_are", metavar="True|False", help="False: Test if any are not empty, True: test if all are not empty (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) KgtkReader.add_debug_arguments(parser, expert=_expert) KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=_expert) KgtkValueOptions.add_arguments(parser, expert=_expert)
def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Namespace): """ Parse arguments Args: parser (argparse.ArgumentParser) """ _expert: bool = parsed_shared_args._expert parser.add_argument( "kgtk_files", nargs="*", help="The KGTK file(s) to validate. May be omitted or '-' for stdin.", type=Path) parser.add_argument( "--header-only", dest="header_only", help= "Process the only the header of the input file (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) KgtkReader.add_debug_arguments(parser, expert=_expert) KgtkReaderOptions.add_arguments(parser, mode_options=True, validate_by_default=True, expert=_expert) KgtkValueOptions.add_arguments(parser, expert=_expert)
def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Namespace): """ Parse arguments Args: parser (argparse.ArgumentParser) """ from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions from kgtk.value.kgtkvalueoptions import KgtkValueOptions _expert: bool = parsed_shared_args._expert parser.add_input_file(positional=True) parser.add_output_file() parser.add_argument( "--columns", dest="key_column_names", help= "The key columns will not be expanded. They will be repeated on each output record. " + "(default=id for node files, (node1, label, node2) for edge files).", nargs='+', default=[]) KgtkReader.add_debug_arguments(parser, expert=_expert) KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=_expert) KgtkValueOptions.add_arguments(parser, expert=_expert)
def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Namespace): """ Parse arguments Args: parser (argparse.ArgumentParser) """ # import modules locally from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions from kgtk.value.kgtkvalueoptions import KgtkValueOptions _expert: bool = parsed_shared_args._expert parser.add_input_file(positional=True) parser.add_output_file(positional=True) parser.add_output_file(who="Reject file", dest="reject_file", options=["--reject-file"], metavar="REJECT_FILE", optional=True) KgtkReader.add_debug_arguments(parser, expert=_expert) KgtkReaderOptions.add_arguments(parser, mode_options=True, validate_by_default=True, expert=_expert) KgtkValueOptions.add_arguments(parser, expert=_expert)
def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Namespace): """ Parse arguments Args: parser (argparse.ArgumentParser) """ _expert: bool = parsed_shared_args._expert # This helper function makes it easy to suppress options from # The help message. The options are still there, and initialize # what they need to initialize. def h(msg: str)->str: if _expert: return msg else: return SUPPRESS parser.add_argument( "input_kgtk_file", nargs="?", type=Path, default="-", help="The KGTK file to filter. May be omitted or '-' for stdin (default=%(default)s).") parser.add_argument("-o", "--output-file", dest="output_kgtk_file", help="The KGTK file to write (default=%(default)s).", type=Path, default="-") KgtkIdBuilderOptions.add_arguments(parser, expert=True) # Show all the options. KgtkReader.add_debug_arguments(parser, expert=_expert) KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=_expert) KgtkValueOptions.add_arguments(parser, expert=_expert)
def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Namespace): """ Parse arguments Args: parser (argparse.ArgumentParser) """ from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions, KgtkReaderMode from kgtk.utils.argparsehelpers import optional_bool from kgtk.value.kgtkvalueoptions import KgtkValueOptions _expert: bool = parsed_shared_args._expert parser.add_input_file(positional=True) parser.add_output_file() parser.add_argument('-c', "--columns", action="store", type=str, dest="columns", nargs='+', required=True, help="Columns to remove as a comma- or space-separated strings, e.g., id,docid or id docid") parser.add_argument( "--split-on-commas", dest="split_on_commas", help="Parse the list of columns, splitting on commas. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=True) parser.add_argument( "--split-on-spaces", dest="split_on_spaces", help="Parse the list of columns, splitting on spaces. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--strip-spaces", dest="strip_spaces", help="Parse the list of columns, stripping whitespace. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=True) KgtkReader.add_debug_arguments(parser, expert=_expert) KgtkReaderOptions.add_arguments(parser, mode_options=True, default_mode=KgtkReaderMode.NONE, expert=_expert) KgtkValueOptions.add_arguments(parser, expert=_expert)
def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Namespace): """ Parse arguments Args: parser (argparse.ArgumentParser) """ from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions from kgtk.utils.argparsehelpers import optional_bool from kgtk.value.kgtkvalueoptions import KgtkValueOptions _expert: bool = parsed_shared_args._expert parser.add_input_file(positional=True, optional=False) parser.add_output_file( who= "Graph tool file to dump the graph too - if empty, it will not be saved.", optional=True) parser.add_argument( '--undirected', dest="undirected", help="When True, the graph is undirected. (default=%(default)s)", type=optional_bool, nargs='?', const=True, default=False, metavar="True|False") KgtkReader.add_debug_arguments(parser, expert=_expert) KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=_expert) KgtkValueOptions.add_arguments(parser, expert=_expert)
def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Namespace): """ Parse arguments Args: parser (argparse.ArgumentParser) """ # import modules locally from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions from kgtk.reshape.kgtkidbuilder import KgtkIdBuilder, KgtkIdBuilderOptions from kgtk.value.kgtkvalueoptions import KgtkValueOptions _expert: bool = parsed_shared_args._expert # This helper function makes it easy to suppress options from # The help message. The options are still there, and initialize # what they need to initialize. def h(msg: str) -> str: if _expert: return msg else: return SUPPRESS parser.add_input_file(positional=True) parser.add_output_file() KgtkIdBuilderOptions.add_arguments(parser, expert=True) # Show all the options. KgtkReader.add_debug_arguments(parser, expert=_expert) KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=_expert) KgtkValueOptions.add_arguments(parser, expert=_expert)
def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Namespace): """ Parse arguments Args: parser (argparse.ArgumentParser) """ _expert: bool = parsed_shared_args._expert # This helper function makes it easy to suppress options from # The help message. The options are still there, and initialize # what they need to initialize. def h(msg: str)->str: if _expert: return msg else: return SUPPRESS parser.add_argument( "input_file_path", help="The KGTK file to convert to a GitHub markdown table.", type=Path, nargs='?', default=Path("-")) parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK file to write (default=%(default)s).", type=Path, default="-") parser.add_argument( "--output-format", dest="output_format", help=h("The file format (default=%(default)s)"), type=str, default="md") KgtkReader.add_debug_arguments(parser, expert=_expert) KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=_expert) KgtkValueOptions.add_arguments(parser, expert=_expert)
def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Namespace): """ Parse arguments Args: parser (argparse.ArgumentParser) """ from kgtk.value.kgtkvalueoptions import KgtkValueOptions from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions _expert: bool = parsed_shared_args._expert parser.add_input_file() parser.add_argument('--output-path', action='store', type=str, dest="output_path", required=True, help="Path of an existing folder where the split files will be written") parser.add_argument('--file-prefix', action='store', type=str, default='split_', dest='file_prefix', required=False, help="file name prefix, will be appended to output file names before a number") parser.add_argument('--split-by-qnode', default=False, action="store_true", dest='split_by_qnode', help="If specified, all edges for a qnode will be written to a separate file, " "qnode will be added to the file name. WARNING: If there are millions of Qnodes, " "this option will create millions of file." " Default [FALSE]") parser.add_argument('--gzipped-output', default=False, action="store_true", dest='gzipped_output', help="If specified, the output split files will be gzipped. Default FALSE") parser.add_argument('--lines', action='store', dest='lines', type=int, default=1000000, required=False, help="number of lines in each split file. The actual number of lines will exceed this number, " "since Qnode boundaries are preserved.") KgtkReader.add_debug_arguments(parser, expert=_expert) KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=_expert) KgtkValueOptions.add_arguments(parser, expert=_expert)
def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Namespace): """ Parse arguments Args: parser (argparse.ArgumentParser) """ _expert: bool = parsed_shared_args._expert # This helper function makes it easy to suppress options from # The help message. The options are still there, and initialize # what they need to initialize. def h(msg: str) -> str: if _expert: return msg else: return SUPPRESS parser.add_argument("input_file_path", help="The KGTK input file. (default=%(default)s).", type=Path, default="-") parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK file to write (default=%(default)s).", type=Path, default="-") parser.add_argument("--output-format", dest="output_format", help=h("The file format (default=kgtk)"), type=str) parser.add_argument( "--output-columns", dest="output_column_names", metavar="NEW_COLUMN_NAME", help="The list of new column names when renaming all columns.", type=str, nargs='+') parser.add_argument( "--old-columns", dest="old_column_names", metavar="OLD_COLUMN_NAME", help="The list of old column names for selective renaming.", type=str, nargs='+') parser.add_argument( "--new-columns", dest="new_column_names", metavar="NEW_COLUMN_NAME", help="The list of new column names for selective renaming.", type=str, nargs='+') KgtkReader.add_debug_arguments(parser, expert=_expert) KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=_expert) KgtkValueOptions.add_arguments(parser, expert=_expert)
def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Namespace): """ Parse arguments Args: parser (argparse.ArgumentParser) """ _expert: bool = parsed_shared_args._expert parser.add_input_file(positional=True, who="The KGTK file to find connected components in.") parser.add_output_file() parser.add_argument("--no-header", action="store_true", dest="no_header", help="Specify if the input file does not have a header, default FALSE") parser.add_argument("--properties", action="store", type=str, dest="properties", help=' A comma separated list of properties to traverse while finding connected components, ' 'by default all properties will be considered', default='') parser.add_argument('--undirected', action='store_true', dest="undirected", help="Specify if the input graph is undirected, default FALSE") parser.add_argument('--strong', action='store_true', dest="strong", help="Treat graph as directed or not, independent of its actual directionality.") # CMR: The folowing options aren't used. Is the intent to support them, or # was this cut-and-paste overkill? KgtkReader.add_debug_arguments(parser, expert=_expert) KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=_expert) KgtkReaderOptions.add_arguments(parser, mode_options=True, who="input", expert=_expert, defaults=False)
def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Namespace): """ Parse arguments Args: parser (argparse.ArgumentParser) """ from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions from kgtk.utils.argparsehelpers import optional_bool from kgtk.value.kgtkvalueoptions import KgtkValueOptions _expert: bool = parsed_shared_args._expert parser.add_input_file(who="The KGTK file(s) to validate.", dest="input_files", options=["-i", "--input-files"], allow_list=True, positional=True) parser.add_argument( "--header-only", dest="header_only", help= "Process the only the header of the input file (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) KgtkReader.add_debug_arguments(parser, expert=_expert) KgtkReaderOptions.add_arguments(parser, mode_options=True, validate_by_default=True, expert=_expert) KgtkValueOptions.add_arguments(parser, expert=_expert)
def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Namespace): """ Parse arguments Args: parser (argparse.ArgumentParser) """ # import modules locally from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions from kgtk.utils.argparsehelpers import optional_bool from kgtk.value.kgtkvalueoptions import KgtkValueOptions _expert: bool = parsed_shared_args._expert # This helper function makes it easy to suppress options from # The help message. The options are still there, and initialize # what they need to initialize. def h(msg: str) -> str: if _expert: return msg else: return SUPPRESS parser.add_input_file() parser.add_output_file() parser.add_argument("--output-format", dest="output_format", help=h("The file format (default=kgtk)"), type=str) parser.add_argument( '-c', "--columns", dest="column_names", required=True, nargs='+', metavar="COLUMN_NAME", help= "The list of source column names, optionally containing '..' for column ranges " + "and '...' for column names not explicitly mentioned.") parser.add_argument( "--into", dest="into_column_name", help="The name of the column to receive the result of the calculation.", required=True) parser.add_argument("--do", dest="operation", help="The name of the operation.", required=True, choices=["percentage"]) parser.add_argument("--format", dest="format_string", help="The format string for the calculation.") KgtkReader.add_debug_arguments(parser, expert=_expert) KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=_expert) KgtkValueOptions.add_arguments(parser, expert=_expert)
def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Namespace): """ Parse arguments Args: parser (argparse.ArgumentParser) """ from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions, KgtkReaderMode from kgtk.utils.argparsehelpers import optional_bool from kgtk.value.kgtkvalueoptions import KgtkValueOptions _expert: bool = parsed_shared_args._expert parser.add_input_file(positional=True) parser.add_output_file() parser.add_output_file( who="The KGTK file for input records that fail the filter.", dest="reject_file", options=["--reject-file"], metavar="REJECT_FILE", optional=True) parser.add_argument( "--columns", dest="filter_column_names", help="The columns in the file being filtered (Required).", nargs='+', required=True) parser.add_argument( "--count", dest="only_count", metavar="True|False", help="Only count the records, do not copy them. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--all", dest="all_are", metavar="True|False", help= "False: Test if any are empty, True: test if all are empty (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) KgtkReader.add_debug_arguments(parser, expert=_expert) KgtkReaderOptions.add_arguments( parser, mode_options=True, default_mode=KgtkReaderMode[parsed_shared_args._mode], expert=_expert) KgtkValueOptions.add_arguments(parser, expert=_expert)
def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Namespace): """ Parse arguments Args: parser (argparse.ArgumentParser) """ _expert: bool = parsed_shared_args._expert # This helper function makes it easy to suppress options from # The help message. The options are still there, and initialize # what they need to initialize. def h(msg: str) -> str: if _expert: return msg else: return SUPPRESS parser.add_input_file(who="KGTK input files", dest="input_files", options=["-i", "--input-files"], allow_list=True, positional=True) parser.add_output_file() parser.add_argument("--output-format", dest="output_format", help="The file format (default=kgtk)", type=str, choices=KgtkWriter.OUTPUT_FORMAT_CHOICES) parser.add_argument( "--output-columns", dest="output_column_names", metavar="NEW_COLUMN_NAME", help=h("The list of new column names when renaming all columns."), type=str, nargs='+') parser.add_argument( "--old-columns", dest="old_column_names", metavar="OLD_COLUMN_NAME", help=h("The list of old column names for selective renaming."), type=str, nargs='+') parser.add_argument( "--new-columns", dest="new_column_names", metavar="NEW_COLUMN_NAME", help=h("The list of new column names for selective renaming."), type=str, nargs='+') KgtkReader.add_debug_arguments(parser, expert=_expert) KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=_expert) KgtkValueOptions.add_arguments(parser, expert=_expert)
def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Namespace): """ Parse arguments Args: parser (argparse.ArgumentParser) """ # import modules locally from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions from kgtk.utils.argparsehelpers import optional_bool from kgtk.value.kgtkvalueoptions import KgtkValueOptions _expert: bool = parsed_shared_args._expert _command: str = parsed_shared_args._command # This helper function makes it easy to suppress options from # The help message. The options are still there, and initialize # what they need to initialize. def h(msg: str) -> str: if _expert: return msg else: return SUPPRESS parser.add_input_file() parser.add_output_file() parser.add_argument("--output-format", dest="output_format", help=h("The file format (default=kgtk)"), type=str) parser.add_argument( '-c', "--columns", dest="column_names", required=True, nargs='+', metavar="COLUMN_NAME", help= "The list of reordered column names, optionally containing '...' for column names not explicitly mentioned." ) parser.add_argument( "--trim", dest="omit_remaining_columns", help="If true, omit unmentioned columns. (default=%(default)s).", metavar="True|False", type=optional_bool, nargs='?', const=True, default=(_command == SELECT_COLUMNS_COMMAND)) KgtkReader.add_debug_arguments(parser, expert=_expert) KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=_expert) KgtkValueOptions.add_arguments(parser, expert=_expert)
def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Namespace): """ Parse arguments Args: parser (argparse.ArgumentParser) """ from kgtk.imports.kgtkntriples import KgtkNtriples from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions, KgtkReaderMode from kgtk.reshape.kgtkidbuilder import KgtkIdBuilder, KgtkIdBuilderOptions from kgtk.utils.argparsehelpers import optional_bool from kgtk.value.kgtkvalueoptions import KgtkValueOptions _expert: bool = parsed_shared_args._expert # This helper function makes it easy to suppress options from # The help message. The options are still there, and initialize # what they need to initialize. def h(msg: str) -> str: if _expert: return msg else: return SUPPRESS parser.add_input_file(who="The ntriples file(s) to import.", allow_list=True) parser.add_output_file() parser.add_output_file( who="The ntriples output file for records that are rejected.", dest="reject_file", options=["--reject-file"], metavar="REJECT_FILE", optional=True) parser.add_input_file(who="The KGTK input file with known namespaces.", dest="namespace_file", options=["--namespace-file"], metavar="NAMESPACE_FILE", optional=True) parser.add_output_file(who="The KGTK output file with updated namespaces.", dest="updated_namespace_file", options=["--updated-namespace-file"], metavar="NAMESPACE_FILE", optional=True) KgtkNtriples.add_arguments(parser) KgtkIdBuilderOptions.add_arguments(parser) KgtkReader.add_debug_arguments(parser, expert=_expert) KgtkReaderOptions.add_arguments( parser, mode_options=True, default_mode=KgtkReaderMode[parsed_shared_args._mode], expert=_expert) KgtkValueOptions.add_arguments(parser)
def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Namespace): """ Parse arguments Args: parser (argparse.ArgumentParser) """ from kgtk.exceptions import KGTKException from kgtk.lift.kgtklift import KgtkLift from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions from kgtk.utils.argparsehelpers import optional_bool from kgtk.value.kgtkvalueoptions import KgtkValueOptions _expert: bool = parsed_shared_args._expert _command: str = parsed_shared_args._command # This helper function makes it easy to suppress options from # The help message. The options are still there, and initialize # what they need to initialize. def h(msg: str) -> str: if _expert: return msg else: return SUPPRESS parser.add_input_file() parser.add_output_file() # The default value for this option depends upon the command used. parser.add_argument( '-l', '--lines', dest="count_records", metavar="True/False", help="If true, count records and print a single number to stdout. " + "If false, count non-empty values per column and produce a simple KGTK output file. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=DEFAULT_COUNT_RECORDS_WC if _command == WC_COMMAND else DEFAULT_COUNT_RECORDS) # This is an expert option. It will not show up on `--help` without `--expert`: parser.add_argument( "--count-property", dest="count_property", help= h("The property used for column count output edges. (default=%(default)s)." ), default=DEFAULT_COUNT_PROPERTY) KgtkReader.add_debug_arguments(parser, expert=_expert) KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=_expert) KgtkValueOptions.add_arguments(parser, expert=_expert)
def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Namespace): """ Parse arguments Args: parser (argparse.ArgumentParser) """ from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions, KgtkReaderMode from kgtk.unreify.kgtkunreifyvalues import KgtkUnreifyValues from kgtk.utils.argparsehelpers import optional_bool from kgtk.value.kgtkvalueoptions import KgtkValueOptions _expert: bool = parsed_shared_args._expert # This helper function makes it easy to suppress options from # The help message. The options are still there, and initialize # what they need to initialize. def h(msg: str)->str: if _expert: return msg else: return SUPPRESS parser.add_input_file(who="The KGTK input file with the reified data. " + "It must have node1, label, and node2 columns, or their aliases. " + "It may have an ID column; if it does not, one will be appended to the output file. " + "It may not have any additional columns. ") parser.add_output_file(who="The KGTK file to write output records with unreified data. " + "This file may differ in shape from the input file by the addition of an ID column. " + "The records in the output file will not, generally, be in the same order as they appeared in the input file. ") parser.add_output_file(who="A KGTK output file that will contain only the reified RDF statements.", dest="reified_file", options=["--reified-file"], metavar="REIFIED_FILE", optional=True) parser.add_output_file(who="A KGTK output file that will contain only the unreified RDF statements.", dest="unreified_file", options=["--unreified-file"], metavar="UNREIFIED_FILE", optional=True) parser.add_output_file(who="A KGTK output file that will contain only the uninvolved input.", dest="uninvolved_file", options=["--uninvolved-file"], metavar="UNINVOLVED_FILE", optional=True) KgtkUnreifyValues.add_arguments(parser) KgtkReader.add_debug_arguments(parser, expert=_expert) KgtkReaderOptions.add_arguments(parser, mode_options=True, default_mode=KgtkReaderMode[parsed_shared_args._mode], expert=_expert) KgtkValueOptions.add_arguments(parser)
def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Namespace): """ Parse arguments Args: parser (argparse.ArgumentParser) """ _expert: bool = parsed_shared_args._expert # This helper function makes it easy to suppress options from # The help message. The options are still there, and initialize # what they need to initialize. def h(msg: str)->str: if _expert: return msg else: return SUPPRESS parser.add_argument( "input_kgtk_file", nargs="?", type=Path, default="-", help="The KGTK file to filter. May be omitted or '-' for stdin (default=%(default)s).") parser.add_argument("-o", "--output-file", dest="output_kgtk_file", help="The KGTK file to write (default=%(default)s).", type=Path, default="-") parser.add_argument( "--column", dest="column_name", help="The name of the column to explode. (default=%(default)s).", default=KgtkFormat.NODE2) fgroup: _MutuallyExclusiveGroup = parser.add_mutually_exclusive_group() fgroup.add_argument( "--types", dest="type_names", nargs='*', help="The KGTK data types for which fields should be exploded. (default=%(default)s).", choices=KgtkFormat.DataType.choices(), default=KgtkFormat.DataType.choices()) fgroup.add_argument( "--fields", dest="field_names", nargs='*', help=h("The names of the fields to extract (overrides --types). (default=%(default)s)."), choices=KgtkValueFields.FIELD_NAMES) parser.add_argument( "--prefix", dest="prefix", help="The prefix for exploded column names. (default=%(default)s).", default=KgtkFormat.NODE2 + ";" + KgtkFormat.KGTK_NAMESPACE) parser.add_argument( "--overwrite", dest="overwrite_columns", help="Indicate that it is OK to overwrite existing columns. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--expand", dest="expand_list", help="Expand the source column if it contains a list, else fail. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--show-data-types", dest="show_data_types", help="Print the list of data types and exit. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) KgtkReader.add_debug_arguments(parser, expert=_expert) KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=_expert) KgtkValueOptions.add_arguments(parser, expert=_expert)
def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Namespace ): from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions, KgtkReaderMode from kgtk.utils.argparsehelpers import optional_bool from kgtk.value.kgtkvalueoptions import KgtkValueOptions _expert: bool = parsed_shared_args._expert parser.add_input_file() parser.add_input_file(who="The entity label file(s)", dest="entity_label_files", options=['--entity-label-file'], metavar="ENTITY_LABEL_FILE", optional=True, allow_list=True, default_stdin=False) parser.add_output_file() parser.add_argument("--label-properties", dest="label_properties", nargs="*", help="The label properties. (default=%s)" % repr(DEFAULT_LABEL_PROPERTIES)) parser.add_argument("--description-properties", dest="description_properties", nargs="*", help="The description properties. (default=%s)" % repr(DEFAULT_DESCRIPTION_PROPERTIES)) parser.add_argument("--isa-properties", dest="isa_properties", nargs="*", help="The isa properties. (default=%s)" % repr(DEFAULT_ISA_PROPERTIES)) parser.add_argument("--has-properties", dest="has_properties", nargs="*", help="The has properties. (default=%s)" % repr(DEFAULT_HAS_PROPERTIES)) parser.add_argument("--property-values", dest="property_values", nargs="*", help="The property values. (default=%s)" % repr(DEFAULT_PROPERTY_VALUES)) parser.add_argument('--sentence-label', action='store', type=str, dest='sentence_label', default=DEFAULT_SENTENCE_LABEL, help="The relationship to write in the output file. (default=%(default)s)") parser.add_argument("--explain", dest="explain", metavar="True|False", help="When true, include an explanation column that tells how the sentence was constructed. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument("--presorted", dest="presorted", metavar="True|False", help="When true, the input file is presorted on node1. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument("--add-entity-labels-from-input", dest="add_entity_labels_from_input", metavar="True|False", help="When true, extract entity labels from the unsorted input file. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) KgtkReader.add_debug_arguments(parser, expert=False) KgtkReaderOptions.add_arguments(parser, mode_options=True, default_mode=KgtkReaderMode[parsed_shared_args._mode], expert=_expert) KgtkValueOptions.add_arguments(parser, expert=False)
def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Namespace): """ Parse arguments Args: parser (argparse.ArgumentParser) """ _expert: bool = parsed_shared_args._expert parser.add_argument( "input_kgtk_file", nargs="?", help="The KGTK file to filter. May be omitted or '-' for stdin.", type=Path) parser.add_argument( "--columns", dest="filter_column_names", help="The columns in the file being filtered (Required).", nargs='+', required=True) parser.add_argument( "--count", dest="only_count", help="Only count the records, do not copy them. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument("-o", "--output-file", dest="output_kgtk_file", help="The KGTK file to write (default=%(default)s).", type=Path, default="-") parser.add_argument( "--all", dest="all_are", help= "False: Test if any are, True: test if all are (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) KgtkReader.add_debug_arguments(parser, expert=_expert) KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=_expert) KgtkValueOptions.add_arguments(parser, expert=_expert)
def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Namespace): """ Parse arguments Args: parser (argparse.ArgumentParser) """ from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions from kgtk.join.unique import Unique from kgtk.utils.argparsehelpers import optional_bool from kgtk.value.kgtkvalueoptions import KgtkValueOptions _expert: bool = parsed_shared_args._expert # This helper function makes it easy to suppress options from # The help message. The options are still there, and initialize # what they need to initialize. def h(msg: str)->str: if _expert: return msg else: return SUPPRESS parser.add_input_file(positional=True) parser.add_output_file() parser.add_argument( "--column", dest="column_name", help="The column to count unique values (required).", required=True) parser.add_argument( "--empty", dest="empty_value", help="A value to substitute for empty values (default=%(default)s).", default="") parser.add_argument( "--label", dest="label_value", help="The output file label column value (default=%(default)s).", default="count") # TODO: use an emum parser.add_argument( "--format", dest="output_format", help=h("The output file format and mode (default=%(default)s)."), default=Unique.DEFAULT_FORMAT, choices=Unique.OUTPUT_FORMATS) parser.add_argument( "--prefix", dest="prefix", help=h("The value prefix (default=%(default)s)."), default="") parser.add_argument( "--where", dest="where_column_name", help="The name of a column for a record selection test. (default=%(default)s).", default=None) parser.add_argument( "--in", dest="where_values", nargs="+", help="The list of values for a record selection test. (default=%(default)s).", default=None) parser.add_argument( "--presorted", dest="presorted", metavar="True|False", help="When True, the input file is presorted. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) KgtkReader.add_debug_arguments(parser, expert=_expert) KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=_expert) KgtkValueOptions.add_arguments(parser, expert=_expert)
def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Namespace): """ Parse arguments Args: parser (argparse.ArgumentParser) """ from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions, KgtkReaderMode from kgtk.io.kgtkwriter import KgtkWriter from kgtk.utils.argparsehelpers import optional_bool from kgtk.value.kgtkvalueoptions import KgtkValueOptions _expert: bool = parsed_shared_args._expert # This helper function makes it easy to suppress options from # The help message. The options are still there, and initialize # what they need to initialize. def h(msg: str) -> str: if _expert: return msg else: return SUPPRESS parser.add_input_file(who="The KGTK file to convert to an HTML table.", positional=True) parser.add_output_file(who="The GitHub markdown file to write.") parser.add_argument( "--pp", "--readable", dest="readable", help= "If true, use a human-readable output format. (default=%(default)s).", metavar="True/False", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument("--output-format", dest="output_format", type=str, help=h("The file format (default=%(default)s)"), default=KgtkWriter.OUTPUT_FORMAT_HTML_COMPACT) KgtkReader.add_debug_arguments(parser, expert=_expert) KgtkReaderOptions.add_arguments(parser, mode_options=True, default_mode=KgtkReaderMode.NONE, expert=_expert) KgtkValueOptions.add_arguments(parser, expert=_expert)
def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Namespace): """ Parse arguments Args: parser (argparse.ArgumentParser) """ _expert: bool = parsed_shared_args._expert parser.add_argument( "input_file", nargs="?", help="The KGTK file to read. May be omitted or '-' for stdin.", type=Path) parser.add_argument( "output_file", nargs="?", help="The KGTK file to write. May be omitted or '-' for stdout.", type=Path) KgtkReader.add_debug_arguments(parser, expert=_expert) KgtkReaderOptions.add_arguments(parser, mode_options=True, validate_by_default=True, expert=_expert) KgtkValueOptions.add_arguments(parser, expert=_expert)
def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Namespace): """ Parse arguments Args: parser (argparse.ArgumentParser) """ from kgtk.io.kgtkreader import KgtkReader, KgtkReaderMode, KgtkReaderOptions from kgtk.utils.argparsehelpers import optional_bool from kgtk.value.kgtkvalueoptions import KgtkValueOptions _expert: bool = parsed_shared_args._expert parser.add_input_file() parser.add_input_file(who="The property pattern definitions.", default_stdin=False, options=["--pattern-file"], dest="pattern_file", metavar="PATTERN_FILE") parser.add_output_file(optional=True) parser.add_output_file(who="The property pattern reject output.", optional=True, options=["--reject-file"], dest="reject_file", metavar="REJECT_FILE") parser.add_argument( "--presorted", dest="grouped_input", help="Indicate that the input has been presorted (or at least pregrouped) on the node1 column. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False, metavar="True|False") parser.add_argument( "--process-node1-groups", dest="reject_node1_groups", help="When True, process all records for a node1 value " + "as a group. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=True, metavar="True|False") parser.add_argument( "--no-complaints", dest="no_complaints", help="When true, do not print complaints (when rejects are expected). (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False, metavar="True|False") parser.add_argument( "--complain-immediately", dest="complain_immediately", help="When true, print complaints immediately (for debugging). (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False, metavar="True|False") parser.add_argument( "--add-isa-column", dest="add_isa_column", help="When true, add an ISA column to the output and reject files. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False, metavar="True|False") parser.add_argument( "--isa-column-name", dest="isa_column_name", default="isa;node2", help="The name for the ISA column. (default %(default)s)") parser.add_argument( "--autovalidate", dest="autovalidate", help="When true, validate node1 and node2 values before testing them. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=True, metavar="True|False") KgtkReader.add_debug_arguments(parser, expert=_expert) KgtkReaderOptions.add_arguments(parser, mode_options=True, validate_by_default=True, expert=_expert) KgtkValueOptions.add_arguments(parser, expert=_expert)
def add_arguments_extended(parser: KGTKArgumentParser, parsed_shared_args: Namespace): """ Parse arguments Args: parser (argparse.ArgumentParser) """ _expert: bool = parsed_shared_args._expert # This helper function makes it easy to suppress options from # The help message. The options are still there, and initialize # what they need to initialize. def h(msg: str)->str: if _expert: return msg else: return SUPPRESS parser.add_argument( "input_kgtk_file", nargs="?", type=Path, default="-", help="The KGTK file to filter. May be omitted or '-' for stdin (default=%(default)s).") parser.add_argument( "--columns", dest="key_column_names", help="The key columns to identify records for compaction. " + "(default=id for node files, (node1, label, node2, id) for edge files).", nargs='+', default=[ ]) parser.add_argument( "--compact-id", dest="compact_id", help="Indicate that the ID column in KGTK edge files should be compacted. " + "Normally, if the ID column exists, it is not compacted, " + "as there are use cases that need to maintain distinct lists of secondary edges for each ID value. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--presorted", dest="sorted_input", help="Indicate that the input has been presorted (or at least pregrouped) (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) parser.add_argument( "--verify-sort", dest="verify_sort", help="If the input has been presorted, verify its consistency (disable if only pregrouped). (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=True) parser.add_argument("-o", "--output-file", dest="output_kgtk_file", help="The KGTK file to write (default=%(default)s).", type=Path, default="-") parser.add_argument( "--build-id", dest="build_id", help="Build id values in an id column. (default=%(default)s).", type=optional_bool, nargs='?', const=True, default=False) KgtkIdBuilderOptions.add_arguments(parser, expert=_expert) KgtkReader.add_debug_arguments(parser, expert=_expert) KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=_expert) KgtkValueOptions.add_arguments(parser, expert=_expert)