Ejemplo n.º 1
0
Archivo: unique.py Proyecto: yyht/kgtk
def add_arguments_extended(parser: KGTKArgumentParser,
                           parsed_shared_args: Namespace):
    """
    Parse arguments
    Args:
        parser (argparse.ArgumentParser)
    """
    from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions
    from kgtk.value.kgtkvalueoptions import KgtkValueOptions

    _expert: bool = parsed_shared_args._expert

    # This helper function makes it easy to suppress options from
    # The help message.  The options are still there, and initialize
    # what they need to initialize.
    def h(msg: str) -> str:
        if _expert:
            return msg
        else:
            return SUPPRESS

    parser.add_input_file(positional=True)
    parser.add_output_file()

    parser.add_argument("--column",
                        dest="column_name",
                        help="The column to count unique values (required).",
                        required=True)

    parser.add_argument(
        "--empty",
        dest="empty_value",
        help="A value to substitute for empty values (default=%(default)s).",
        default="")

    parser.add_argument(
        "--label",
        dest="label_value",
        help="The output file label column value (default=%(default)s).",
        default="count")

    # TODO: use an emum
    parser.add_argument(
        "--format",
        dest="output_format",
        help=h("The output file format and mode (default=%(default)s)."),
        default="edge",
        choices=["edge", "node"])

    parser.add_argument("--prefix",
                        dest="prefix",
                        help=h("The value prefix (default=%(default)s)."),
                        default="")

    parser.add_argument(
        "--where",
        dest="where_column_name",
        help=
        "The name of a column for a record selection test. (default=%(default)s).",
        default=None)

    parser.add_argument(
        "--in",
        dest="where_values",
        nargs="+",
        help=
        "The list of values for a record selection test. (default=%(default)s).",
        default=None)

    KgtkReader.add_debug_arguments(parser, expert=_expert)
    KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=_expert)
    KgtkValueOptions.add_arguments(parser, expert=_expert)
Ejemplo n.º 2
0
def main():
    """
    Test the KGTK implode processor.
    """
    parser: ArgumentParser = ArgumentParser()

    parser.add_argument(
        dest="input_file_path",
        help="The KGTK file with the input data. (default=%(default)s)",
        type=Path,
        nargs="?",
        default="-")

    parser.add_argument(
        "--column",
        dest="column_name",
        help="The name of the column to explode. (default=%(default)s).",
        default="node2")

    parser.add_argument(
        "--types",
        dest="type_names",
        nargs='*',
        help=
        "The KGTK data types for which fields should be imploded. (default=%(default)s).",
        choices=KgtkFormat.DataType.choices(),
        default=KgtkFormat.DataType.choices())

    parser.add_argument(
        "--without",
        dest="without_fields",
        nargs='*',
        help="The KGTK fields to do without. (default=%(default)s).",
        choices=KgtkValueFields.OPTIONAL_DEFAULT_FIELD_NAMES,
        default=None)

    parser.add_argument("-o",
                        "--output-file",
                        dest="output_file_path",
                        help="The KGTK file to write (default=%(default)s).",
                        type=Path,
                        default="-")

    parser.add_argument(
        "--prefix",
        dest="prefix",
        help="The prefix for exploded column names. (default=%(default)s).",
        default="node2;kgtk:")

    parser.add_argument(
        "--overwrite",
        dest="overwrite_column",
        help=
        "Indicate that it is OK to overwrite an existing imploded column. (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=True)

    parser.add_argument(
        "--validate",
        dest="validate",
        help="Validate imploded values. (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=True)

    parser.add_argument(
        "--escape-pipes",
        dest="escape_pipes",
        help=
        "When true, pipe characters (|) need to be escaped (\\|) per KGTK file format. (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=False)

    parser.add_argument(
        "--quantities-include-numbers",
        dest="quantities_include_numbers",
        help=
        "When true, numbers are acceptable quantities. (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=True)

    parser.add_argument(
        "--general-strings",
        dest="general_strings",
        help=
        "When true, strings may include language qualified strings. (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=True)

    parser.add_argument(
        "--remove-prefixed-columns",
        dest="remove_prefixed_columns",
        help=
        "When true, remove all columns beginning with the prefix from the output file. (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=False)

    parser.add_argument(
        "--ignore-unselected-types",
        dest="ignore_unselected_types",
        help=
        "When true, input records with valid but unselected data types will be passed through to output. (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=True)

    parser.add_argument(
        "--retain-unselected-types",
        dest="retain_unselected_types",
        help=
        "When true, input records with valid but unselected data types will be retain existing data on output. (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=True)

    parser.add_argument(
        "--build-id",
        dest="build_id",
        help="Build id values in an id column. (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=False)

    parser.add_argument(
        "--reject-file",
        dest="reject_file_path",
        help=
        "The KGTK file into which to write rejected records (default=%(default)s).",
        type=Path,
        default=None)

    KgtkIdBuilderOptions.add_arguments(parser)
    KgtkReader.add_debug_arguments(parser)
    KgtkReaderOptions.add_arguments(parser, mode_options=True)
    KgtkValueOptions.add_arguments(parser)

    args: Namespace = parser.parse_args()

    error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr

    # Build the option structures.
    idbuilder_options: KgtkIdBuilderOptions = KgtkIdBuilderOptions.from_args(
        args)
    reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args)
    value_options: KgtkValueOptions = KgtkValueOptions.from_args(args)

    # Show the final option structures for debugging and documentation.
    if args.show_options:
        # TODO: show ifempty-specific options.
        print("input: %s" % str(args.input_file_path),
              file=error_file,
              flush=True)
        print("--column %s" % args.column_name, file=error_file, flush=True)
        print("--prefix %s" % args.prefix, file=error_file, flush=True)
        print("--overwrite %s" % str(args.overwrite_column),
              file=error_file,
              flush=True)
        print("--validate %s" % str(args.validate),
              file=error_file,
              flush=True)
        print("--escape-pipes %s" % str(args.escape_pipes),
              file=error_file,
              flush=True)
        print("--quantities-include-numbers %s" %
              str(args.quantities_include_numbers),
              file=error_file,
              flush=True)
        print("--general-strings %s" % str(args.general_strings),
              file=error_file,
              flush=True)
        print("--remove-prefixed-columns %s" %
              str(args.remove_prefixed_columns),
              file=error_file,
              flush=True)
        print("--ignore-unselected-types %s" %
              str(args.ignore_unselected_types),
              file=error_file,
              flush=True)
        print("--retain-unselected-types %s" %
              str(args.retain_unselected_types),
              file=error_file,
              flush=True)
        if args.type_names is not None:
            print("--types %s" % " ".join(args.type_names),
                  file=error_file,
                  flush=True)
        if args.without_fields is not None:
            print("--without %s" % " ".join(args.without_fields),
                  file=error_file,
                  flush=True)
        print("--output-file=%s" % str(args.output_file_path),
              file=error_file,
              flush=True)
        if args.reject_file_path is not None:
            print("--reject-file=%s" % str(args.reject_file_path),
                  file=error_file,
                  flush=True)
        print("--build-id=%s" % str(args.build_id),
              file=error_file,
              flush=True)
        idbuilder_options.show(out=error_file)
        reader_options.show(out=error_file)
        value_options.show(out=error_file)

    without_fields: typing.List[
        str] = args.without_fields if args.without_fields is not None else list(
        )

    ex: KgtkImplode = KgtkImplode(
        input_file_path=args.input_file_path,
        column_name=args.column_name,
        prefix=args.prefix,
        type_names=args.type_names,
        without_fields=without_fields,
        overwrite_column=args.overwrite_column,
        validate=args.validate,
        escape_pipes=args.escape_pipes,
        quantities_include_numbers=args.quantities_include_numbers,
        general_strings=args.general_strings,
        remove_prefixed_columns=args.remove_prefixed_columns,
        ignore_unselected_types=args.ignore_unselected_types,
        retain_unselected_types=args.retain_unselected_types,
        output_file_path=args.output_file_path,
        reject_file_path=args.reject_file_path,
        build_id=args.build_id,
        idbuilder_options=idbuilder_options,
        reader_options=reader_options,
        value_options=value_options,
        error_file=error_file,
        verbose=args.verbose,
        very_verbose=args.very_verbose)

    ex.process()
Ejemplo n.º 3
0
def add_arguments_extended(parser: KGTKArgumentParser,
                           parsed_shared_args: Namespace):
    from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions
    from kgtk.utils.argparsehelpers import optional_bool
    from kgtk.value.kgtkvalueoptions import KgtkValueOptions

    _expert: bool = parsed_shared_args._expert

    parser.add_input_file()
    parser.add_input_file(who="The entity label file(s)",
                          dest="entity_label_files",
                          options=['--entity-label-file'],
                          metavar="ENTITY_LABEL_FILE",
                          optional=True,
                          allow_list=True,
                          default_stdin=False)
    parser.add_output_file()

    parser.add_argument("--label-properties",
                        dest="label_properties",
                        nargs="*",
                        help="The label properties. (default=%s)" %
                        repr(DEFAULT_LABEL_PROPERTIES))

    parser.add_argument("--description-properties",
                        dest="description_properties",
                        nargs="*",
                        help="The description properties. (default=%s)" %
                        repr(DEFAULT_DESCRIPTION_PROPERTIES))

    parser.add_argument("--isa-properties",
                        dest="isa_properties",
                        nargs="*",
                        help="The isa properties. (default=%s)" %
                        repr(DEFAULT_ISA_PROPERTIES))

    parser.add_argument("--has-properties",
                        dest="has_properties",
                        nargs="*",
                        help="The has properties. (default=%s)" %
                        repr(DEFAULT_HAS_PROPERTIES))

    parser.add_argument("--property-values",
                        dest="property_values",
                        nargs="*",
                        help="The property values. (default=%s)" %
                        repr(DEFAULT_PROPERTY_VALUES))

    parser.add_argument(
        '--sentence-label',
        action='store',
        type=str,
        dest='sentence_label',
        default=DEFAULT_SENTENCE_LABEL,
        help=
        "The relationship to write in the output file. (default=%(default)s)")

    parser.add_argument(
        "--explain",
        dest="explain",
        metavar="True|False",
        help=
        "When true, include an explanation column that tells how the sentence was constructed. (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=False)

    parser.add_argument(
        "--presorted",
        dest="presorted",
        metavar="True|False",
        help=
        "When true, the input file is presorted on node1. (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=False)

    parser.add_argument(
        "--add-entity-labels-from-input",
        dest="add_entity_labels_from_input",
        metavar="True|False",
        help=
        "When true, extract entity labels from the unsorted input file. (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=False)

    KgtkReader.add_debug_arguments(parser, expert=False)
    KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=False)
    KgtkValueOptions.add_arguments(parser, expert=False)
Ejemplo n.º 4
0
def main():
    """
    Test the KGTK file joiner.
    """
    parser: ArgumentParser = ArgumentParser()

    parser.add_argument(dest="input_file_path",
                        help="The KGTK file with the input data",
                        type=Path,
                        nargs="?")

    parser.add_argument("--filter-on",
                        dest="filter_file_path",
                        help="The KGTK file with the filter data (required).",
                        type=Path,
                        required=True)

    parser.add_argument("-o",
                        "--output-file",
                        dest="output_file_path",
                        help="The KGTK file to write (default=%(default)s).",
                        type=Path,
                        default="-")

    parser.add_argument(
        "--field-separator",
        dest="field_separator",
        help="Separator for multifield keys (default=%(default)s)",
        default=KgtkIfExists.FIELD_SEPARATOR_DEFAULT)

    parser.add_argument(
        "--invert",
        dest="invert",
        help="Invert the test (if not exists) (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=False)

    parser.add_argument(
        "--cache-input",
        dest="cache_input",
        help=
        "Cache the input file instead of the filter keys. (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=False)

    parser.add_argument(
        "--preserve-order",
        dest="preserve_order",
        help=
        "Preserve record order when cacheing the input file. (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=False)

    parser.add_argument(
        "--input-keys",
        dest="input_keys",
        help="The key columns in the input file (default=None).",
        nargs='*')
    parser.add_argument(
        "--filter-keys",
        dest="filter_keys",
        help="The key columns in the filter file (default=None).",
        nargs='*')

    KgtkReader.add_debug_arguments(parser)
    KgtkReaderOptions.add_arguments(parser, mode_options=True, who="input")
    KgtkReaderOptions.add_arguments(parser, mode_options=True, who="filter")
    KgtkValueOptions.add_arguments(parser)

    args: Namespace = parser.parse_args()

    error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr

    # Build the option structures.
    input_reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(
        args, who="input")
    filter_reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(
        args, who="filter")
    value_options: KgtkValueOptions = KgtkValueOptions.from_args(args)

    # Show the final option structures for debugging and documentation.
    if args.show_options:
        print("input: %s" % (str(args.input_file_path)
                             if args.input_file_path is not None else "-"),
              file=error_file)
        print("--filter-on=%s" % str(args.filter_file_path), file=error_file)
        print("--output-file=%s" % str(args.output_file_path), file=error_file)
        print("--field-separator=%s" % repr(args.field_separator),
              file=error_file)
        print("--invert=%s" % str(args.invert), file=error_file)
        print("--cache-input=%s" % str(args.cache_input), file=error_file)
        print("--preserve-order=%s" % str(args.preserve_order),
              file=error_file)
        if args.input_keys is not None:
            print("--input-keys %s" % " ".join(args.input_keys),
                  file=error_file)
        if args.filter_keys is not None:
            print("--filter-keys %s" % " ".join(args.filter_keys),
                  file=error_file)
        input_reader_options.show(out=error_file, who="input")
        filter_reader_options.show(out=error_file, who="filter")
        value_options.show(out=error_file)

    ie: KgtkIfExists = KgtkIfExists(
        input_file_path=args.input_file_path,
        input_keys=args.input_keys,
        filter_file_path=args.filter_file_path,
        filter_keys=args.filter_keys,
        output_file_path=args.output_file_path,
        field_separator=args.field_separator,
        invert=args.invert,
        cache_input=args.cache_input,
        preserve_order=args.preserve_order,
        input_reader_options=input_reader_options,
        filter_reader_options=filter_reader_options,
        value_options=value_options,
        error_file=error_file,
        verbose=args.verbose,
        very_verbose=args.very_verbose)

    ie.process()
Ejemplo n.º 5
0
def main():
    """
    Test the KGTK file concatenator.
    """
    parser = ArgumentParser()
    parser.add_argument(dest="input_file_paths",
                        help="The KGTK files to concatenate",
                        type=Path,
                        nargs='+')
    parser.add_argument("-o",
                        "--output-file",
                        dest="output_file_path",
                        help="The KGTK file to write (default=%(default)s)",
                        type=Path,
                        default="-")

    parser.add_argument("--output-format",
                        dest="output_format",
                        help="The file format (default=kgtk)",
                        type=str,
                        choices=KgtkWriter.OUTPUT_FORMAT_CHOICES)

    parser.add_argument(
        "--output-columns",
        dest="output_column_names",
        help="Rename all output columns. (default=%(default)s)",
        type=str,
        nargs='+')
    parser.add_argument(
        "--old-columns",
        dest="old_column_names",
        help="Rename seleted output columns: old names. (default=%(default)s)",
        type=str,
        nargs='+')
    parser.add_argument(
        "--new-columns",
        dest="new_column_names",
        help="Rename seleted output columns: new names. (default=%(default)s)",
        type=str,
        nargs='+')

    KgtkReader.add_debug_arguments(parser, expert=True)
    KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=True)
    KgtkValueOptions.add_arguments(parser, expert=True)

    args = parser.parse_args()

    error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr

    # Build the option structures.
    reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args)
    value_options: KgtkValueOptions = KgtkValueOptions.from_args(args)

    # Show the final option structures for debugging and documentation.
    if args.show_options:
        input_files: typing.List[str] = []
        input_file: Path
        for input_file in args.input_file_paths:
            input_files.append(str(input_file))
        print("input: %s" % " ".join(input_files), file=error_file, flush=True)
        print("--output-file=%s" % args.output_file_path,
              file=error_file,
              flush=True)
        if args.output_format is not None:
            print("--output-format=%s" % args.output_format,
                  file=error_file,
                  flush=True)
        if args.output_column_names is not None:
            print("--output-columns=%s" % " ".join(args.output_column_names),
                  file=error_file,
                  flush=True)
        if args.old_column_names is not None:
            print("--old-columns=%s" % " ".join(args.old_column_names),
                  file=error_file,
                  flush=True)
        if args.new_column_names is not None:
            print("--new-columns=%s" % " ".join(args.new_column_names),
                  file=error_file,
                  flush=True)
        reader_options.show(out=error_file)
        value_options.show(out=error_file)

    kc: KgtkCat = KgtkCat(input_file_paths=args.input_file_paths,
                          output_path=args.output_file_path,
                          output_format=args.output_format,
                          output_column_names=args.output_column_names,
                          old_column_names=args.old_column_names,
                          new_column_names=args.new_column_names,
                          reader_options=reader_options,
                          value_options=value_options,
                          error_file=error_file,
                          verbose=args.verbose,
                          very_verbose=args.very_verbose)

    kc.process()
Ejemplo n.º 6
0
def add_arguments_extended(parser: KGTKArgumentParser,
                           parsed_shared_args: Namespace):
    """
    Parse arguments
    Args:
        parser (argparse.ArgumentParser)
    """
    from kgtk.kgtkformat import KgtkFormat
    from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions
    from kgtk.io.kgtkwriter import KgtkWriter
    from kgtk.utils.argparsehelpers import optional_bool
    from kgtk.value.kgtkvalue import KgtkValueFields
    from kgtk.value.kgtkvalueoptions import KgtkValueOptions

    _expert: bool = parsed_shared_args._expert

    # This helper function makes it easy to suppress options from
    # The help message.  The options are still there, and initialize
    # what they need to initialize.
    def h(msg: str) -> str:
        if _expert:
            return msg
        else:
            return SUPPRESS

    parser.add_input_file(positional=True)
    parser.add_output_file()

    parser.add_argument(
        "--column",
        dest="column_name",
        help="The name of the column to explode. (default=%(default)s).",
        default=KgtkFormat.NODE2)

    fgroup: _MutuallyExclusiveGroup = parser.add_mutually_exclusive_group()
    fgroup.add_argument(
        "--types",
        dest="type_names",
        nargs='*',
        help=
        "The KGTK data types for which fields should be exploded. (default=%(default)s).",
        choices=KgtkFormat.DataType.choices(),
        default=KgtkFormat.DataType.choices())

    fgroup.add_argument(
        "--fields",
        dest="field_names",
        nargs='*',
        help=
        h("The names of the fields to extract (overrides --types). (default=%(default)s)."
          ),
        choices=KgtkValueFields.FIELD_NAMES)

    parser.add_argument(
        "--prefix",
        dest="prefix",
        help="The prefix for exploded column names. (default=%(default)s).",
        default=KgtkFormat.NODE2 + ";" + KgtkFormat.KGTK_NAMESPACE)

    parser.add_argument(
        "--overwrite",
        dest="overwrite_columns",
        metavar="True|False",
        help=
        "Indicate that it is OK to overwrite existing columns. (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=False)

    parser.add_argument(
        "--expand",
        dest="expand_list",
        metavar="True|False",
        help=
        "When True, expand source cells that contain a lists, else fail if a source cell contains a list. (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=False)

    parser.add_argument(
        "--show-data-types",
        dest="show_data_types",
        metavar="True|False",
        help="Print the list of data types and exit. (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=False)

    parser.add_argument(
        "--show-field-names",
        dest="show_field_names",
        metavar="True|False",
        help="Print the list of field names and exit. (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=False)

    parser.add_argument(
        "--show-field-formats",
        dest="show_field_formats",
        metavar="True|False",
        help=
        "Print the list of field names and formats, then exit. (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=False)

    parser.add_argument("--output-format",
                        dest="output_format",
                        help="The file format (default=kgtk)",
                        type=str,
                        choices=KgtkWriter.OUTPUT_FORMAT_CHOICES)

    KgtkReader.add_debug_arguments(parser, expert=_expert)
    KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=_expert)
    KgtkValueOptions.add_arguments(parser, expert=_expert)
Ejemplo n.º 7
0
def add_arguments_extended(parser: KGTKArgumentParser,
                           parsed_shared_args: Namespace):
    """
    Parse arguments
    Args:
        parser (argparse.ArgumentParser)
    """

    _expert: bool = parsed_shared_args._expert

    # This helper function makes it easy to suppress options from
    # The help message.  The options are still there, and initialize
    # what they need to initialize.
    def h(msg: str) -> str:
        if _expert:
            return msg
        else:
            return SUPPRESS

    parser.add_input_file(positional=True)
    parser.add_output_file()
    parser.add_output_file(
        who=
        "An optional output file for new edges (normalized and/or lowered). " +
        "If omitted, new edges will go in the main output file.",
        dest="new_edges_file",
        options=["--new-edges-file"],
        metavar="NEW_EDGES_FILE",
        optional=True)

    parser.add_argument(
        "--columns",
        "--columns-to-lower",
        "--columns-to-remove",
        action="store",
        type=str,
        dest="columns_to_lower",
        nargs='+',
        help=
        "Columns to lower and remove as a space-separated list. (default=all columns other than key columns)"
    )

    parser.add_argument(
        "--base-columns",
        dest="base_columns",
        help=
        h("Optionally, explicitly list the base column for each column being lowered. "
          +
          " --base-columns and --columns-to-lower must have the same number of entries."
          ),
        nargs='*')

    parser.add_argument(
        "--label-value",
        action="store",
        type=str,
        dest="label_value",
        help=
        h("The label value to use for lowered edges when --base-columns is used. (default=%(default)s)"
          ),
        default=KgtkLift.DEFAULT_LABEL_SELECT_COLUMN_VALUE)

    parser.add_argument(
        "--lift-separator",
        dest="lift_separator",
        help=
        h("The separator between the base column and the label value. (default=%(default)s)."
          ),
        default=KgtkLift.DEFAULT_OUTPUT_LIFTED_COLUMN_SEPARATOR)

    parser.add_argument(
        "--lower",
        dest="lower",
        help=
        "When True, lower columns that match a lift pattern. (default=%(default)s)",
        type=optional_bool,
        nargs='?',
        const=True,
        default=True,
        metavar="True|False")

    parser.add_argument(
        "--normalize",
        dest="normalize",
        help=
        "When True, normalize columns that do not match a lift pattern. (default=%(default)s)",
        type=optional_bool,
        nargs='?',
        const=True,
        default=True,
        metavar="True|False")

    parser.add_argument(
        "--deduplicate-new-edges",
        dest="deduplicate_new_edges",
        help=
        "When True, deduplicate new edges. Not suitable for large files. (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=True,
        metavar="True|False")

    KgtkReader.add_debug_arguments(parser, expert=_expert)
    KgtkReaderOptions.add_arguments(parser,
                                    mode_options=True,
                                    default_mode=KgtkReaderMode.EDGE,
                                    expert=_expert)
    KgtkValueOptions.add_arguments(parser, expert=_expert)
Ejemplo n.º 8
0
def main():
    """
    Test the KGTK ifempty processor.
    """
    parser: ArgumentParser = ArgumentParser()

    parser.add_argument(dest="input_file_path",
                        help="The KGTK file with the input data",
                        type=Path,
                        nargs="?")

    parser.add_argument("--columns",
                        dest="filter_column_names",
                        help="The columns to filter on (default=None).",
                        nargs='+',
                        required=True)

    parser.add_argument(
        "--count",
        dest="only_count",
        help="Only count the records, do not copy them. (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=False)

    parser.add_argument("-o",
                        "--output-file",
                        dest="output_file_path",
                        help="The KGTK file to write (default=%(default)s).",
                        type=Path,
                        default="-")

    parser.add_argument(
        "--all",
        dest="all_are",
        help=
        "False: Test if any are, True: test if all are (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=False)

    parser.add_argument(
        "--not-empty",
        dest="notempty",
        help=
        "False: test if empty, True: test if not empty (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=False)

    KgtkReader.add_debug_arguments(parser)
    KgtkReaderOptions.add_arguments(parser, mode_options=True)
    KgtkValueOptions.add_arguments(parser)

    args: Namespace = parser.parse_args()

    error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr

    # Build the option structures.
    reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args)
    value_options: KgtkValueOptions = KgtkValueOptions.from_args(args)

    # Show the final option structures for debugging and documentation.
    if args.show_options:
        # TODO: show ifempty-specific options.
        reader_options.show(out=error_file)
        value_options.show(out=error_file)

    ie: KgtkIfEmpty = KgtkIfEmpty(input_file_path=args.input_file_path,
                                  filter_column_names=args.filter_column_names,
                                  output_file_path=args.output_file_path,
                                  all_are=args.all_are,
                                  notempty=args.notempty,
                                  only_count=args.only_count,
                                  reader_options=reader_options,
                                  value_options=value_options,
                                  error_file=error_file,
                                  verbose=args.verbose,
                                  very_verbose=args.very_verbose)

    ie.process()
Ejemplo n.º 9
0
def main():
    """
    Test the KGTK unique processor.
    """
    parser: ArgumentParser = ArgumentParser()

    parser.add_argument(dest="input_file_path", help="The KGTK file with the input data", type=Path, nargs="?")

    parser.add_argument(      "--column", dest="column_name", help="The column to count unique values (required).", required=True)

    parser.add_argument(      "--empty", dest="empty_value", help="A value to substitute for empty values (default=%(default)s).", default="")

    parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK file to write (default=%(default)s).", type=Path, default="-")

    parser.add_argument(      "--label", dest="label_value", help="The output file label column value (default=%(default)s).", default="count")

    # TODO: use an enum
    parser.add_argument(      "--format", dest="output_format", help="The output file format and mode (default=%(default)s).",
                              default=Unique.DEFAULT_FORMAT, choices=Unique.OUTPUT_FORMATS)

    parser.add_argument(      "--prefix", dest="prefix", help="The value prefix (default=%(default)s).", default="")

    parser.add_argument(      "--where", dest="where_column_name",
                              help="The name of a column for a record selection test. (default=%(default)s).", default=None)

    parser.add_argument(      "--in", dest="where_values", nargs="+",
                              help="The list of values for a record selection test. (default=%(default)s).", default=None)

    parser.add_argument(      "--presorted", dest="presorted", metavar="True|False",
                              help="When True, the input file is presorted. (default=%(default)s).",
                              type=optional_bool, nargs='?', const=True, default=False)

    KgtkReader.add_debug_arguments(parser)
    KgtkReaderOptions.add_arguments(parser, mode_options=True)
    KgtkValueOptions.add_arguments(parser)

    args: Namespace = parser.parse_args()

    error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr

    # Build the option structures.                                                                                                                          
    reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args)
    value_options: KgtkValueOptions = KgtkValueOptions.from_args(args)

   # Show the final option structures for debugging and documentation.                                                                                             
    if args.show_options:
        print("input: %s" % (str(args.input_file_path) if args.input_file_path is not None else "-"), file=error_file)
        print("--column=%s" % args.column_name, file=error_file)
        print("--empty=%s" % args.empty_value, file=error_file)
        print("--output-file=%s" % str(args.output_file_path), file=error_file)
        print("--label=%s" % args.label_value, file=error_file)
        print("--format=%s" % args.output_format, file=error_file)
        print("--prefix=%s" % args.prefix, file=error_file)
        if args.where_column_name is not None:
            print("--where=%s" % args.where_column_name, file=error_file)
        if args.where_values is not None and len(args.where_values) > 0:
            print("--in=%s" % " ".join(args.where_values), file=error_file)
        print("--prefix=%s" % repr(args.presorted), file=error_file)

        reader_options.show(out=error_file)
        value_options.show(out=error_file)

    uniq: Unique = Unique(
        input_file_path=args.input_file_path,
        column_name=args.column_name,
        output_file_path=args.output_file_path,
        empty_value=args.empty_value,
        label_value=args.label_value,
        output_format=args.output_format,
        prefix=args.prefix,
        where_column_name=args.where_column_name,
        where_values=args.where_values,
        presorted=args.presorted,
        reader_options=reader_options,
        value_options=value_options,
        error_file=error_file,
        verbose=args.verbose,
        very_verbose=args.very_verbose)

    uniq.process()
Ejemplo n.º 10
0
def main():
    """
    Test the KGTK ifempty processor.
    """
    parser: ArgumentParser = ArgumentParser()

    parser.add_argument(dest="input_file_path", help="The KGTK file with the input data. (default=%(default)s)", type=Path, nargs="?", default="-")

    parser.add_argument(      "--column", dest="column_name", help="The name of the column to explode. (default=%(default)s).", default="node2")

    fgroup: ArgumentParser = parser.add_mutually_exclusive_group()

    fgroup.add_argument(      "--types", dest="type_names", nargs='*',
                               help="The KGTK data types for which fields should be exploded. (default=%(default)s).",
                               choices=KgtkFormat.DataType.choices(),
                               default=KgtkFormat.DataType.choices())

    fgroup.add_argument(      "--fields", dest="field_names",  nargs='*',
                              help="The names of the fields to extract (overrides --types). (default=%(default)s).",
                              choices=KgtkValueFields.FIELD_NAMES)

    parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK file to write (default=%(default)s).", type=Path, default="-")
    
    parser.add_argument(      "--prefix", dest="prefix", help="The prefix for exploded column names. (default=%(default)s).", default="node2;kgtk:")

    parser.add_argument(      "--overwrite", dest="overwrite_columns",
                              help="Indicate that it is OK to overwrite existing columns. (default=%(default)s).",
                              type=optional_bool, nargs='?', const=True, default=False)

    parser.add_argument(      "--expand", dest="expand_list",
                              help="Expand the source column if it contains a list, else fail. (default=%(default)s).",
                              type=optional_bool, nargs='?', const=True, default=False)

    KgtkReader.add_debug_arguments(parser)
    KgtkReaderOptions.add_arguments(parser, mode_options=True)
    KgtkValueOptions.add_arguments(parser)

    args: Namespace = parser.parse_args()

    error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr

    # Build the option structures.                                                                                                                          
    reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args)
    value_options: KgtkValueOptions = KgtkValueOptions.from_args(args)

   # Show the final option structures for debugging and documentation.                                                                                             
    if args.show_options:
        # TODO: show ifempty-specific options.
        print("input: %s" % str(args.input_file_path), file=error_file, flush=True)
        print("--column %s" % args.column_name, file=error_file, flush=True)
        print("--prefix %s" % args.prefix, file=error_file, flush=True)
        print("--overwrite %s" % str(args.overwrite_columns), file=error_file, flush=True)
        print("--expand %s" % str(args.expand_list), file=error_file, flush=True)
        if args.field_names is not None:
            print("--fields %s" % " ".join(args.field_names), file=error_file, flush=True)
        if args.type_names is not None:
            print("--types %s" % " ".join(args.type_names), file=error_file, flush=True)
        print("--output-file=%s" % str(args.output_file_path))
        reader_options.show(out=error_file)
        value_options.show(out=error_file)

    ex: KgtkExplode = KgtkExplode(
        input_file_path=args.input_file_path,
        column_name=args.column_name,
        prefix=args.prefix,
        field_names=args.field_names,
        type_names=args.type_names,
        overwrite_columns=args.overwrite_columns,
        expand_list=args.expand_list,
        output_file_path=args.output_file_path,
        reader_options=reader_options,
        value_options=value_options,
        error_file=error_file,
        verbose=args.verbose,
        very_verbose=args.very_verbose)

    ex.process()
Ejemplo n.º 11
0
def add_arguments_extended(parser: KGTKArgumentParser,
                           parsed_shared_args: Namespace):
    """
    Parse arguments
    Args:
        parser (argparse.ArgumentParser)
    """

    _expert: bool = parsed_shared_args._expert

    # '$label == "/r/DefinedAs" && $node2=="/c/en/number_zero"'
    parser.add_argument(
        "input_kgtk_file",
        nargs="?",
        help="The KGTK file to filter. May be omitted or '-' for stdin.",
        type=Path,
        default="-")
    parser.add_argument(
        "-o",
        "--output-file",
        dest="output_kgtk_file",
        help=
        "The KGTK file to write records that pass the filter (default=%(default)s).",
        type=Path,
        default="-")
    parser.add_argument(
        "--reject-file",
        dest="reject_kgtk_file",
        help=
        "The KGTK file to write records that fail the filter (default=%(default)s).",
        type=Path,
        default=None)

    # parser.add_argument('-dt', "--datatype", action="store", type=str, dest="datatype", help="Datatype of the input file, e.g., tsv or csv.", default="tsv")
    parser.add_argument(
        '-p',
        '--pattern',
        action="store",
        type=str,
        dest="pattern",
        help="Pattern to filter on, for instance, \" ; P154 ; \" ",
        required=True)
    parser.add_argument('--subj',
                        action="store",
                        type=str,
                        dest='subj_col',
                        help="Subject column, default is node1")
    parser.add_argument('--pred',
                        action="store",
                        type=str,
                        dest='pred_col',
                        help="Predicate column, default is label")
    parser.add_argument('--obj',
                        action="store",
                        type=str,
                        dest='obj_col',
                        help="Object column, default is node2")

    parser.add_argument(
        "--or",
        dest="or_pattern",
        help="'Or' the clauses of the pattern. (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=False)

    parser.add_argument(
        "--invert",
        dest="invert",
        help=
        "Invert the result of applying the pattern. (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=False)

    KgtkReader.add_debug_arguments(parser, expert=_expert)
    KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=_expert)
    KgtkValueOptions.add_arguments(parser, expert=_expert)
Ejemplo n.º 12
0
def add_arguments_extended(parser: KGTKArgumentParser,
                           parsed_shared_args: Namespace):
    """
    Parse arguments
    Args:
        parser (argparse.ArgumentParser)
    """
    _expert: bool = parsed_shared_args._expert

    # This helper function makes it easy to suppress options from
    # The help message.  The options are still there, and initialize
    # what they need to initialize.
    def h(msg: str) -> str:
        if _expert:
            return msg
        else:
            return SUPPRESS

    parser.add_input_file(positional=True)
    parser.add_input_file(who="The KGTK file to filter against.",
                          options=["--filter-on"],
                          dest="filter_file",
                          metavar="FILTER_FILE")
    parser.add_output_file()

    parser.add_argument(
        "--input-keys",
        "--left-keys",
        dest="input_keys",
        help="The key columns in the file being filtered (default=None).",
        nargs='*')

    parser.add_argument(
        "--filter-keys",
        "--right-keys",
        dest="filter_keys",
        help="The key columns in the filter-on file (default=None).",
        nargs='*')

    parser.add_argument(
        "--cache-input",
        dest="cache_input",
        metavar="True|False",
        help=
        "Cache the input file instead of the filter keys (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=False)

    parser.add_argument(
        "--preserve-order",
        dest="preserve_order",
        metavar="True|False",
        help=
        "Preserve record order when cacheing the input file. (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=False)

    parser.add_argument(
        "--field-separator",
        dest="field_separator",
        help=h("Separator for multifield keys (default=%(default)s)"),
        default=KgtkIfExists.FIELD_SEPARATOR_DEFAULT)

    KgtkReader.add_debug_arguments(parser, expert=_expert)
    KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=_expert)
    KgtkReaderOptions.add_arguments(parser,
                                    mode_options=True,
                                    who="input",
                                    expert=_expert,
                                    defaults=False)
    KgtkReaderOptions.add_arguments(parser,
                                    mode_options=True,
                                    who="filter",
                                    expert=_expert,
                                    defaults=False)
    KgtkValueOptions.add_arguments(parser, expert=_expert)
Ejemplo n.º 13
0
Archivo: filter.py Proyecto: yyht/kgtk
def add_arguments_extended(parser: KGTKArgumentParser,
                           parsed_shared_args: Namespace):
    """
    Parse arguments
    Args:
        parser (argparse.ArgumentParser)
    """
    from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions
    from kgtk.utils.argparsehelpers import optional_bool
    from kgtk.value.kgtkvalueoptions import KgtkValueOptions

    _expert: bool = parsed_shared_args._expert

    # '$label == "/r/DefinedAs" && $node2=="/c/en/number_zero"'
    parser.add_input_file(positional=True)
    parser.add_output_file(
        who="The KGTK output file for records that pass the filter.")
    parser.add_output_file(
        who="The KGTK reject file for records that fail the filter.",
        dest="reject_file",
        options=["--reject-file"],
        metavar="REJECT_FILE",
        optional=True)

    # parser.add_argument('-dt', "--datatype", action="store", type=str, dest="datatype", help="Datatype of the input file, e.g., tsv or csv.", default="tsv")
    parser.add_argument(
        '-p',
        '--pattern',
        action="store",
        type=str,
        dest="pattern",
        help="Pattern to filter on, for instance, \" ; P154 ; \" ",
        required=True)
    parser.add_argument('--subj',
                        action="store",
                        type=str,
                        dest='subj_col',
                        help="Subject column, default is node1")
    parser.add_argument('--pred',
                        action="store",
                        type=str,
                        dest='pred_col',
                        help="Predicate column, default is label")
    parser.add_argument('--obj',
                        action="store",
                        type=str,
                        dest='obj_col',
                        help="Object column, default is node2")

    parser.add_argument(
        "--or",
        dest="or_pattern",
        metavar="True|False",
        help="'Or' the clauses of the pattern. (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=False)

    parser.add_argument(
        "--invert",
        dest="invert",
        metavar="True|False",
        help=
        "Invert the result of applying the pattern. (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=False)

    parser.add_argument(
        "--show-version",
        dest="show_version",
        type=optional_bool,
        nargs='?',
        const=True,
        default=False,
        help="Print the version of this program. (default=%(default)s).",
        metavar="True/False")

    KgtkReader.add_debug_arguments(parser, expert=_expert)
    KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=_expert)
    KgtkValueOptions.add_arguments(parser, expert=_expert)
Ejemplo n.º 14
0
def add_arguments_extended(parser: KGTKArgumentParser,
                           parsed_shared_args: Namespace):
    """
    Parse arguments
    Args:
        parser (argparse.ArgumentParser)
    """
    from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions
    from kgtk.reshape.kgtkidbuilder import KgtkIdBuilder, KgtkIdBuilderOptions
    from kgtk.utils.argparsehelpers import optional_bool
    from kgtk.value.kgtkvalueoptions import KgtkValueOptions

    _command: str = parsed_shared_args._command
    _expert: bool = parsed_shared_args._expert

    # This helper function makes it easy to suppress options from
    # The help message.  The options are still there, and initialize
    # what they need to initialize.
    def h(msg: str) -> str:
        if _expert:
            return msg
        else:
            return SUPPRESS

    parser.add_input_file(positional=True)
    parser.add_output_file()

    if _command == DEDUP_COMMAND:

        parser.add_argument(
            "--columns",
            dest="key_column_names",
            help=
            h("The key columns to identify records for compaction. " +
              "(default=id for node files, (node1, label, node2, id) for edge files)."
              ),
            nargs='+',
            default=[])

        parser.add_argument(
            "--compact-id",
            dest="compact_id",
            help=
            h("Indicate that the ID column in KGTK edge files should be compacted. "
              + "Normally, if the ID column exists, it is not compacted, " +
              "as there are use cases that need to maintain distinct lists of secondary edges for each ID value. (default=%(default)s)."
              ),
            type=optional_bool,
            nargs='?',
            const=True,
            default=False,
            metavar="True|False")

        parser.add_argument(
            "--deduplicate",
            dest="deduplicate",
            help=
            h("Treat all columns as key columns, overriding --columns and --compact-id. "
              +
              "This will remove completely duplicate records without compacting any new lists. "
              + "(default=%(default)s)."),
            type=optional_bool,
            nargs='?',
            const=True,
            default=True,
            metavar="True|False")

        parser.add_argument(
            "--lists-in-input",
            dest="lists_in_input",
            help=
            h("Assume that the input file may contain lists (disable when certain it does not). (default=%(default)s)."
              ),
            type=optional_bool,
            nargs='?',
            const=True,
            default=True)

    else:
        parser.add_argument(
            "--columns",
            dest="key_column_names",
            help="The key columns to identify records for compaction. " +
            "(default=id for node files, (node1, label, node2, id) for edge files).",
            nargs='+',
            default=[])

        parser.add_argument(
            "--compact-id",
            dest="compact_id",
            help=
            "Indicate that the ID column in KGTK edge files should be compacted. "
            + "Normally, if the ID column exists, it is not compacted, " +
            "as there are use cases that need to maintain distinct lists of secondary edges for each ID value. (default=%(default)s).",
            type=optional_bool,
            nargs='?',
            const=True,
            default=False,
            metavar="True|False")

        parser.add_argument(
            "--deduplicate",
            dest="deduplicate",
            help=
            "Treat all columns as key columns, overriding --columns and --compact-id. "
            +
            "This will remove completely duplicate records without compacting any new lists. "
            + "(default=%(default)s).",
            type=optional_bool,
            nargs='?',
            const=True,
            default=False,
            metavar="True|False")

        parser.add_argument(
            "--lists-in-input",
            dest="lists_in_input",
            help=
            "Assume that the input file may contain lists (disable when certain it does not). (default=%(default)s).",
            type=optional_bool,
            nargs='?',
            const=True,
            default=True)

    parser.add_argument(
        "--presorted",
        dest="sorted_input",
        help=
        "Indicate that the input has been presorted (or at least pregrouped) (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=False,
        metavar="True|False")

    parser.add_argument(
        "--verify-sort",
        dest="verify_sort",
        help=
        "If the input has been presorted, verify its consistency (disable if only pregrouped). (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=True,
        metavar="True|False")

    parser.add_argument(
        "--build-id",
        dest="build_id",
        help="Build id values in an id column. (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=False,
        metavar="True|False")

    KgtkIdBuilderOptions.add_arguments(parser, expert=_expert)
    KgtkReader.add_debug_arguments(parser, expert=_expert)
    KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=_expert)
    KgtkValueOptions.add_arguments(parser, expert=_expert)
Ejemplo n.º 15
0
def add_arguments_extended(parser: KGTKArgumentParser,
                           parsed_shared_args: Namespace):
    """
    Parse arguments
    Args:
            parser (argparse.ArgumentParser)
    """
    from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions, KgtkReaderMode
    from kgtk.utils.argparsehelpers import optional_bool
    from kgtk.value.kgtkvalueoptions import KgtkValueOptions

    _expert: bool = parsed_shared_args._expert

    parser.add_input_file(positional=True, optional=False)
    parser.add_output_file()

    parser.add_argument(
        '--undirected',
        dest="undirected",
        help='Is the graph undirected? If false, then the graph is ' +
        ' treated as (node1)->(node2).  If true, then the graph is ' +
        ' treated as (node1)<->(node2). ' +
        '\nAlso, HITS will not be computed on undirected graphs. ' +
        '\n(default=%(default)s)',
        type=optional_bool,
        nargs='?',
        const=True,
        default=False,
        metavar='True|False')

    parser.add_argument(
        '--compute-pagerank',
        dest='compute_pagerank',
        help='Whether or not to compute the PageRank property. ' +
        '\nNote: --undirected improves the pagerank calculation. ' +
        'If you want both pagerank and in/out-degrees, you should make two runs. '
        + '\n(default=%(default)s)',
        type=optional_bool,
        nargs='?',
        const=True,
        default=True,
        metavar='True|False')

    parser.add_argument(
        '--compute-hits',
        dest='compute_hits',
        help='Whether or not to compute the HITS properties. ' +
        '\nNote: --undirected disables HITS calculation. (default=%(default)s)',
        type=optional_bool,
        nargs='?',
        const=True,
        default=True,
        metavar='True|False')

    parser.add_argument(
        '--output-statistics-only',
        dest='output_statistics_only',
        help=
        'If this option is set, write only the statistics edges to the primary output file. '
        +
        'Else, write both the statistics and the original graph. (default=%(default)s',
        type=optional_bool,
        nargs='?',
        const=True,
        default=False,
        metavar='True|False')

    parser.add_argument(
        '--output-degrees',
        dest='output_degrees',
        help=
        'Whether or not to write degree edges to the primary output file. (default=%(default)s)',
        type=optional_bool,
        nargs='?',
        const=True,
        default=True,
        metavar='True|False')

    parser.add_argument(
        '--output-pagerank',
        dest='output_pagerank',
        help=
        'Whether or not to write pagerank edges to the primary output file. (default=%(default)s)',
        type=optional_bool,
        nargs='?',
        const=True,
        default=True,
        metavar='True|False')

    parser.add_argument(
        '--output-hits',
        dest='output_hits',
        help=
        'Whether or not to write HITS edges to the primary output file. (default=%(default)s)',
        type=optional_bool,
        nargs='?',
        const=True,
        default=True,
        metavar='True|False')

    parser.add_argument(
        '--log-file',
        action='store',
        type=str,
        dest='log_file',
        help='Summary file for the global statistics of the graph.',
        default='./summary.txt')

    parser.add_argument(
        '--log-top-relations',
        dest='log_top_relations',
        help=
        'Whether or not to compute top relations and output them to the log file. (default=%(default)s)',
        type=optional_bool,
        nargs='?',
        const=True,
        default=True,
        metavar='True|False')

    parser.add_argument(
        '--log-degrees-histogram',
        dest='log_degrees_histogram',
        help=
        'Whether or not to compute degree distribution and output it to the log file. (default=%(default)s)',
        type=optional_bool,
        nargs='?',
        const=True,
        default=True,
        metavar='True|False')

    parser.add_argument(
        '--log-top-pageranks',
        dest='log_top_pageranks',
        help=
        'Whether or not to output PageRank centrality top-n to the log file. '
        + '\n(default=%(default)s)',
        type=optional_bool,
        nargs='?',
        const=True,
        default=True,
        metavar='True|False')

    parser.add_argument(
        '--log-top-hits',
        dest='log_top_hits',
        help=
        'Whether or not to output the top-n HITS to the log file. (default=%(default)s)',
        type=optional_bool,
        nargs='?',
        const=True,
        default=True,
        metavar='True|False')

    parser.add_argument(
        '--log-top-n',
        action='store',
        dest='top_n',
        default=5,
        type=int,
        help=
        'Number of top centrality nodes to write to the log file. (default=%(default)d)'
    )

    parser.add_argument(
        '--vertex-in-degree-property',
        action='store',
        dest='vertex_in_degree',
        default='vertex_in_degree',
        help='Label for edge: vertex in degree property. ' +
        '\nNote: If --undirected is True, then the in-degree will be 0. ' +
        '\n(default=%(default)s')

    parser.add_argument(
        '--vertex-out-degree-property',
        action='store',
        dest='vertex_out_degree',
        default='vertex_out_degree',
        help='Label for edge: vertex out degree property. ' +
        '\nNote: if --undirected is True, the the out-degree will be the sum of '
        +
        'the values that would have been calculated for in-degree and -out-degree '
        + ' if --undirected were False. ' + '\n(default=%(default)s)')

    parser.add_argument(
        '--page-rank-property',
        action='store',
        dest='vertex_pagerank',
        default='vertex_pagerank',
        help='Label for pank rank property. (default=%(default)s)')

    parser.add_argument(
        '--vertex-hits-authority-property',
        action='store',
        dest='vertex_auth',
        default='vertex_auth',
        help='Label for edge: vertext hits authority. (default=%(default)s)')

    parser.add_argument(
        '--vertex-hits-hubs-property',
        action='store',
        dest='vertex_hubs',
        default='vertex_hubs',
        help='Label for edge: vertex hits hubs. (default=%(default)s)')

    KgtkReader.add_debug_arguments(parser, expert=_expert)
    KgtkReaderOptions.add_arguments(
        parser,
        mode_options=True,
        default_mode=KgtkReaderMode[parsed_shared_args._mode],
        expert=_expert)
    KgtkValueOptions.add_arguments(parser, expert=_expert)
Ejemplo n.º 16
0
def add_arguments_extended(parser: KGTKArgumentParser,
                           parsed_shared_args: Namespace):
    """
    Parse arguments
    Args:
        parser (argparse.ArgumentParser)
    """
    # import modules locally
    from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions
    from kgtk.utils.argparsehelpers import optional_bool
    from kgtk.value.kgtkvalueoptions import KgtkValueOptions

    _expert: bool = parsed_shared_args._expert

    # This helper function makes it easy to suppress options from
    # The help message.  The options are still there, and initialize
    # what they need to initialize.
    def h(msg: str) -> str:
        if _expert:
            return msg
        else:
            return SUPPRESS

    parser.add_input_file()
    parser.add_output_file()

    parser.add_argument("--output-format",
                        dest="output_format",
                        help=h("The file format (default=kgtk)"),
                        type=str)

    parser.add_argument(
        '-c',
        "--columns",
        dest="column_names",
        nargs='*',
        metavar="COLUMN_NAME",
        help=
        "The list of source column names, optionally containing '..' for column ranges "
        + "and '...' for column names not explicitly mentioned.")
    parser.add_argument(
        "--into",
        dest="into_column_names",
        help="The name of the column to receive the result of the calculation.",
        required=True,
        nargs="+")
    parser.add_argument("--do",
                        dest="operation",
                        help="The name of the operation.",
                        required=True,
                        choices=OPERATIONS)

    parser.add_argument("--values",
                        dest="values",
                        nargs='*',
                        metavar="VALUES",
                        help="An optional list of values")

    parser.add_argument("--format",
                        dest="format_string",
                        help="The format string for the calculation.")

    KgtkReader.add_debug_arguments(parser, expert=_expert)
    KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=_expert)
    KgtkValueOptions.add_arguments(parser, expert=_expert)
Ejemplo n.º 17
0
def main():
    """
    Test the KGTK compact processor.
    """
    parser: ArgumentParser = ArgumentParser()

    parser.add_argument(dest="input_file_path", help="The KGTK file with the input data (default=%(default)s)", type=Path, nargs="?", default="-")

    parser.add_argument(      "--columns", dest="key_column_names",
                              help="The key columns to identify records for compaction. " +
                              "(default=id for node files, (node1, label, node2, id) for edge files).", nargs='+', default=[ ])

    parser.add_argument(      "--compact-id", dest="compact_id",
                              help="Indicate that the ID column in KGTK edge files should be compacted. " +
                              "Normally, if the ID column exists, it is not compacted, " +
                              "as there are use cases that need to maintain distinct lists of secondary edges for each ID value. (default=%(default)s).",
                              type=optional_bool, nargs='?', const=True, default=False)

    parser.add_argument(      "--presorted", dest="sorted_input",
                              help="Indicate that the input has been presorted (or at least pregrouped). (default=%(default)s).",
                              type=optional_bool, nargs='?', const=True, default=False)

    parser.add_argument(      "--verify-sort", dest="verify_sort",
                              help="If the input has been presorted, verify its consistency (disable if only pregrouped). (default=%(default)s).",
                              type=optional_bool, nargs='?', const=True, default=True)

    parser.add_argument("-o", "--output-file", dest="output_file_path", help="The KGTK file to write (default=%(default)s).", type=Path, default="-")
    
    parser.add_argument(      "--build-id", dest="build_id",
                              help="Build id values in an id column. (default=%(default)s).",
                              type=optional_bool, nargs='?', const=True, default=False)

    KgtkIdBuilderOptions.add_arguments(parser)
    KgtkReader.add_debug_arguments(parser)
    KgtkReaderOptions.add_arguments(parser, mode_options=True)
    KgtkValueOptions.add_arguments(parser)

    args: Namespace = parser.parse_args()

    error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr

    # Build the option structures.                                                                                                                          
    idbuilder_options: KgtkIdBuilderOptions = KgtkIdBuilderOptions.from_args(args)    
    reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args)
    value_options: KgtkValueOptions = KgtkValueOptions.from_args(args)

   # Show the final option structures for debugging and documentation.                                                                                             
    if args.show_options:
        print("input: %s" % str(args.input_file_path), file=error_file, flush=True)
        print("--columns %s" % " ".join(args.key_column_names), file=error_file, flush=True)
        print("--compact-id=%s" % str(args.compact_id), file=error_file, flush=True)
        print("--presorted=%s" % str(args.sorted_input), file=error_file, flush=True)
        print("--verify-sort=%s" % str(args.verify_sort), file=error_file, flush=True)
        print("--output-file=%s" % str(args.output_file_path), file=error_file, flush=True)
        print("--build-id=%s" % str(args.build_id), file=error_file, flush=True)
        idbuilder_options.show(out=error_file)
        reader_options.show(out=error_file)
        value_options.show(out=error_file)

    kc: KgtkCompact = KgtkCompact(
        input_file_path=args.input_file_path,
        key_column_names=args.key_column_names,
        compact_id=args.compact_id,
        sorted_input=args.sorted_input,
        verify_sort=args.verify_sort,
        output_file_path=args.output_file_path,
        build_id=args.build_id,
        idbuilder_options=idbuilder_options,
        reader_options=reader_options,
        value_options=value_options,
        error_file=error_file,
        verbose=args.verbose,
        very_verbose=args.very_verbose)

    kc.process()
Ejemplo n.º 18
0
def main():
    """
    Test the KGTK copy template.
    """
    parser: ArgumentParser = ArgumentParser()

    parser.add_argument("-i",
                        "--input-file",
                        dest="input_file_path",
                        help="The KGTK input file. (default=%(default)s)",
                        type=Path,
                        default="-")

    parser.add_argument("-o",
                        "--output-file",
                        dest="output_file_path",
                        help="The KGTK output file. (default=%(default)s).",
                        type=Path,
                        default="-")

    parser.add_argument(
        "--keygen",
        dest="keygen",
        help="The KGTK key generation procedure. (default=%(default)s).",
        type=str,
        default="node1")

    parser.add_argument(
        "--group-sort",
        dest="group_sort",
        help="If true, use the grouped sort and buffer. (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=False)

    parser.add_argument(
        "--group-iterate",
        dest="group_iterate",
        help="If true, us the grouped iteration. (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=False)

    KgtkReader.add_debug_arguments(parser)
    KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=True)
    KgtkValueOptions.add_arguments(parser)

    args: Namespace = parser.parse_args()

    error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr

    # Build the option structures.
    reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args)
    value_options: KgtkValueOptions = KgtkValueOptions.from_args(args)

    # Show the final option structures for debugging and documentation.
    if args.show_options:
        print("--input-files %s" %
              " ".join([str(path) for path in input_file_paths]),
              file=error_file,
              flush=True)
        print("--output-file=%s" % str(args.output_file_path),
              file=error_file,
              flush=True)
        print("--keygen=%s" % str(args.keygen), file=error_file, flush=True)
        print("--group-sort=%s" % str(args.group_sort),
              file=error_file,
              flush=True)
        print("--group-iterate=%s" % str(args.group_iterate),
              file=error_file,
              flush=True)

        reader_options.show(out=error_file)
        value_options.show(out=error_file)

    ksbt: KgtkSortBufferTest = KgtkSortBufferTest(
        input_file_path=args.input_file_path,
        output_file_path=args.output_file_path,
        keygen=args.keygen,
        group_sort=args.group_sort,
        group_iterate=args.group_iterate,
        reader_options=reader_options,
        value_options=value_options,
        error_file=error_file,
        verbose=args.verbose,
        very_verbose=args.very_verbose,
    )

    ksbt.process()
Ejemplo n.º 19
0
def add_arguments_extended(parser: KGTKArgumentParser,
                           parsed_shared_args: Namespace):
    from kgtk.utils.argparsehelpers import optional_bool

    _expert: bool = parsed_shared_args._expert

    parser.accept_shared_argument('_debug')

    # input file
    # parser.add_argument('input_file', nargs='?', type=argparse.FileType('r'), default=sys.stdin)
    parser.add_input_file(positional=True)

    # model name
    all_models_names = ALL_EMBEDDING_MODELS_NAMES
    parser.add_argument('-m',
                        '--model',
                        action='store',
                        nargs='+',
                        dest='all_models_names',
                        default="bert-base-nli-cls-token",
                        choices=all_models_names,
                        help="the model to used for embedding")
    # parser.add_argument('-i', '--input', action='store', nargs='+', dest='input_uris',
    #                     help="input path", )
    parser.add_argument(
        '-f',
        '--input-data-format',
        action='store',
        dest='data_format',
        choices=("test_format", "kgtk_format"),
        default="kgtk_format",
        help=
        "the input file format, could either be `test_format` or `kgtk_format`, default is `kgtk_format`",
    )
    parser.add_argument(
        '-p',
        '--property-labels-file',
        action='store',
        nargs='+',
        dest='property_labels_file_uri',
        help="the path to the property labels file.",
    )

    # This should probably default to "--label-properties" if not specified.
    parser.add_argument(
        '--property-labels-filter',
        action='store',
        nargs='+',
        dest='property_labels_filter',
        default=["label"],
        help=
        "The label columns value(s) of the edges to process in the property labels file. Default is [\"label\"]."
    )

    # properties (only valid for kgtk format input/output data)
    parser.add_argument(
        '--label-properties',
        action='store',
        nargs='+',
        dest='label_properties',
        default=["label"],
        help=
        """The names of the edges for label properties, Default is ["label"]. \n 
                        This argument is only valid for input in kgtk format."""
    )
    parser.add_argument(
        '--description-properties',
        action='store',
        nargs='+',
        dest='description_properties',
        default=["description"],
        help=
        """The names of the edges for description properties, Default is ["description"].\n 
                        This argument is only valid for input in kgtk format."""
    )
    parser.add_argument(
        '--isa-properties',
        action='store',
        nargs='+',
        dest='isa_properties',
        default=["P31"],
        help=
        """The names of the edges for `isa` properties, Default is ["P31"] (the `instance of` node in 
                        wikidata).""")
    parser.add_argument(
        '--has-properties',
        action='store',
        nargs='+',
        dest='has_properties',
        default=[],
        help=
        """The names of the edges for `has` properties, Default is ["all"] (will automatically append all 
                        properties found for each node).""")
    parser.add_argument(
        '--property-value',
        action='store',
        nargs='+',
        dest='property_values',
        default=[],
        help=
        """For those edges found in `has` properties, the nodes specified here will display with 
                        corresponding edge(property) values. instead of edge name. """
    )
    parser.add_argument(
        '--property-value-file',
        action='store',
        dest='property_values_file',
        help=
        """Read the properties for --property-value option from an KGTK edge file"""
    )
    parser.add_argument(
        '--output-property',
        action='store',
        dest='output_properties',
        default="text_embedding",
        help=
        """The output property name used to record the embedding. Default is `output_properties`. \n
                        This argument is only valid for output in kgtk format."""
    )
    # output
    parser.add_argument(
        '--save-embedding-sentence',
        action='store_true',
        dest='save_embedding_sentence',
        help="if set, will also save the embedding sentences to output.")
    parser.add_argument(
        '-o',
        '--embedding-projector-metadata-path',
        action='store',
        dest='output_uri',
        default="",
        help=
        "output path for the metadata file, default will be current user's home directory"
    )
    parser.add_argument(
        '--output-data-format',
        action='store',
        dest='output_data_format',
        default="kgtk_format",
        choices=("tsv_format", "kgtk_format"),
        help=
        "output format, can either be `tsv_format` or `kgtk_format`. \nIf choose `tsv_format`, the output "
        "will be a tsv file, with each row contains only the vector representation of a node. Each "
        "dimension is separated by a tab")
    parser.add_argument(
        '--embedding-projector-metadata',
        action='store',
        nargs='+',
        dest='metadata_properties',
        default=[],
        help=
        """list of properties used to construct a metadata file for use in the Google Embedding Projector: 
                        http://projector.tensorflow.org. \n Default: the label and description of each node."""
    )

    # black list file
    parser.add_argument(
        '-b',
        '--black-list',
        nargs='+',
        action='store',
        dest='black_list_files',
        default=[],
        help=
        "the black list file, contains the Q nodes which should not consider as candidates."
    )

    # dimensional reduction relate
    parser.add_argument(
        "--dimensional-reduction",
        nargs='?',
        action='store',
        default="none",
        dest="dimensional_reduction",
        choices=("pca", "tsne", "none"),
        help=
        'whether to run dimensional reduction algorithm or not after the embedding, default is None (not '
        'run). ')
    parser.add_argument(
        "--dimension",
        type=int,
        nargs='?',
        action='store',
        default=2,
        dest="dimension_val",
        help=
        'How many dimension should remained after reductions, only valid when set to run dimensional '
        'reduction, default value is 2 ')

    parser.add_argument(
        "--parallel",
        nargs='?',
        action='store',
        default="1",
        dest="parallel_count",
        help="How many processes to be run in same time, default is 1.")
    # cache config
    parser.add_argument(
        "--use-cache",
        type=optional_bool,
        nargs='?',
        action='store',
        default=False,
        dest="use_cache",
        help=
        "whether to use cache to get some embedding vectors quicker, default is False"
    )
    parser.add_argument(
        "--cache-host",
        nargs='?',
        action='store',
        default="dsbox01.isi.edu",
        dest="cache_host",
        help="cache host address, default is `dsbox01.isi.edu`")
    parser.add_argument("--cache-port",
                        nargs='?',
                        action='store',
                        default="6379",
                        dest="cache_port",
                        help="cache server port, default is `6379`")
    # query server
    parser.add_argument(
        "--query-server",
        nargs='?',
        action='store',
        default="",
        dest="query_server",
        help=
        "sparql query endpoint used for test_format input files, default is "
        "https://query.wikidata.org/sparql ")

    KgtkReader.add_debug_arguments(parser, expert=False)
    KgtkReaderOptions.add_arguments(
        parser,
        mode_options=True,
        default_mode=KgtkReaderMode[parsed_shared_args._mode],
        expert=_expert)
    KgtkValueOptions.add_arguments(parser, expert=False)
Ejemplo n.º 20
0
def add_arguments_extended(parser: KGTKArgumentParser,
                           parsed_shared_args: Namespace):
    """
    Parse arguments
    Args:
        parser (argparse.ArgumentParser)
    """

    _expert: bool = parsed_shared_args._expert

    # This helper function makes it easy to suppress options from
    # The help message.  The options are still there, and initialize
    # what they need to initialize.
    def h(msg: str) -> str:
        if _expert:
            return msg
        else:
            return SUPPRESS

    parser.add_argument(
        "input_kgtk_file",
        nargs="?",
        type=Path,
        default="-",
        help=
        "The KGTK file to filter. May be omitted or '-' for stdin (default=%(default)s)."
    )

    parser.add_argument("-o",
                        "--output-file",
                        dest="output_kgtk_file",
                        help="The KGTK file to write (default=%(default)s).",
                        type=Path,
                        default="-")

    parser.add_argument(
        "--reject-file",
        dest="reject_kgtk_file",
        help=
        "The KGTK file into which to write rejected records (default=%(default)s).",
        type=Path,
        default=None)

    parser.add_argument(
        "--column",
        dest="column_name",
        help="The name of the column to explode. (default=%(default)s).",
        default=KgtkFormat.NODE2)

    parser.add_argument(
        "--prefix",
        dest="prefix",
        help="The prefix for exploded column names. (default=%(default)s).",
        default=KgtkFormat.NODE2 + ";" + KgtkFormat.KGTK_NAMESPACE)

    parser.add_argument(
        "--types",
        dest="type_names",
        nargs='*',
        help=
        "The KGTK data types for which fields should be imploded. (default=%(default)s).",
        choices=KgtkFormat.DataType.choices(),
        default=KgtkFormat.DataType.choices())

    parser.add_argument(
        "--without",
        dest="without_fields",
        nargs='*',
        help="The KGTK fields to do without. (default=%(default)s).",
        choices=KgtkValueFields.OPTIONAL_DEFAULT_FIELD_NAMES,
        default=None)

    parser.add_argument(
        "--overwrite",
        dest="overwrite_column",
        help=
        "Indicate that it is OK to overwrite an existing imploded column. (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=True)

    parser.add_argument(
        "--validate",
        dest="validate",
        help="Validate imploded values. (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=True)

    parser.add_argument(
        "--escape-pipes",
        dest="escape_pipes",
        help=
        "When true, pipe characters (|) need to be escaped (\\|) per KGTK file format. (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=False)

    parser.add_argument(
        "--quantities-include-numbers",
        dest="quantities_include_numbers",
        help=
        "When true, numbers are acceptable quantities. (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=True)

    parser.add_argument(
        "--general-strings",
        dest="general_strings",
        help=
        "When true, strings may include language qualified strings. (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=True)

    parser.add_argument(
        "--remove-prefixed-columns",
        dest="remove_prefixed_columns",
        help=
        "When true, remove all columns beginning with the prefix from the output file. (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=False)

    parser.add_argument(
        "--ignore-unselected-types",
        dest="ignore_unselected_types",
        help=
        "When true, input records with valid but unselected data types will be passed through to output. (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=True)

    parser.add_argument(
        "--retain-unselected-types",
        dest="retain_unselected_types",
        help=
        "When true, input records with valid but unselected data types will be retain existing data on output. (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=True)

    parser.add_argument(
        "--build-id",
        dest="build_id",
        help="Build id values in an id column. (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=False)

    parser.add_argument(
        "--show-data-types",
        dest="show_data_types",
        help="Print the list of data types and exit. (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=False)

    KgtkIdBuilderOptions.add_arguments(parser)
    KgtkReader.add_debug_arguments(parser, expert=_expert)
    KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=_expert)
    KgtkValueOptions.add_arguments(parser, expert=_expert)
Ejemplo n.º 21
0
def main():
    """
    Test the KGTK ID builder.
    """
    parser: ArgumentParser = ArgumentParser()
    parser.add_argument(
        dest="input_file_path",
        help="The KGTK file with the input data (default=%(default)s)",
        type=Path,
        nargs="?",
        default="-")
    parser.add_argument("-o",
                        "--output-file",
                        dest="output_file_path",
                        help="The KGTK file to write (default=%(default)s).",
                        type=Path,
                        default="-")

    KgtkIdBuilderOptions.add_arguments(parser)

    KgtkReader.add_debug_arguments(parser)
    KgtkReaderOptions.add_arguments(parser, mode_options=True)
    KgtkValueOptions.add_arguments(parser)

    args: Namespace = parser.parse_args()

    error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr

    # Build the option structures.
    idbuilder_options: KgtkIdBuilderOptions = KgtkIdBuilderOptions.from_args(
        args)
    reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args)
    value_options: KgtkValueOptions = KgtkValueOptions.from_args(args)

    # Show the final option structures for debugging and documentation.
    if args.show_options:
        print("input: %s" % str(args.input_file_path),
              file=error_file,
              flush=True)
        print("--output-file=%s" % str(args.output_file_path),
              file=error_file,
              flush=True)
        idbuilder_options.show(out=error_file)
        reader_options.show(out=error_file)
        value_options.show(out=error_file)

    # First create the KgtkReader.  It provides parameters used by the ID
    # column builder. Next, create the ID column builder, which provides a
    # possibly revised list of column names for the KgtkWriter.  Last, create
    # the KgtkWriter.

    # Open the input file.
    kr: KgtkReader = KgtkReader.open(
        args.input_file_path,
        error_file=error_file,
        options=reader_options,
        value_options=value_options,
        verbose=args.verbose,
        very_verbose=args.very_verbose,
    )

    # Create the ID builder.
    idb: KgtkIdBuilder = KgtkIdBuilder.new(kr, idbuilder_options)

    # Open the output file.
    ew: KgtkWriter = KgtkWriter.open(idb.column_names,
                                     args.output_file_path,
                                     mode=kr.mode,
                                     require_all_columns=True,
                                     prohibit_extra_columns=True,
                                     fill_missing_columns=False,
                                     gzip_in_parallel=False,
                                     verbose=args.verbose,
                                     very_verbose=args.very_verbose)

    # Process the input file, building IDs.
    idb.process(kr, ew)
Ejemplo n.º 22
0
def add_arguments_extended(parser: KGTKArgumentParser,
                           parsed_shared_args: Namespace):
    """
    Parse arguments
    Args:
        parser (argparse.ArgumentParser)
    """
    from kgtk.iff.kgtkifexists import KgtkIfExists
    from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions
    from kgtk.utils.argparsehelpers import optional_bool
    from kgtk.value.kgtkvalueoptions import KgtkValueOptions

    _expert: bool = parsed_shared_args._expert

    # This helper function makes it easy to suppress options from
    # The help message.  The options are still there, and initialize
    # what they need to initialize.
    def h(msg: str) -> str:
        if _expert:
            return msg
        else:
            return SUPPRESS

    parser.add_input_file(positional=True)
    parser.add_input_file(who="The KGTK file to filter against.",
                          options=["--filter-on", "--filter-file"],
                          dest="filter_file",
                          metavar="FILTER_FILE")
    parser.add_output_file()
    parser.add_output_file(
        who="The KGTK reject file for records that fail the filter.",
        dest="reject_file",
        options=["--reject-file"],
        metavar="REJECT_FILE",
        optional=True)

    parser.add_output_file(
        who=
        "The KGTK file for filter records that matched at least one input record.",
        dest="matched_filter_file",
        options=["--matched-filter-file"],
        metavar="MATCHED_FILTER_FILE",
        optional=True)

    parser.add_output_file(
        who=
        "The KGTK file for filter records that did not match any input records.",
        dest="unmatched_filter_file",
        options=["--unmatched-filter-file"],
        metavar="UNMATCHED_FILTER_FILE",
        optional=True)

    parser.add_output_file(
        who=h("The KGTK file for joined output records (EXPERIMENTAL)."),
        dest="join_file",
        options=["--join-file"],
        metavar="JOIN_FILE",
        optional=True)

    parser.add_argument(
        "--input-keys",
        "--left-keys",
        dest="input_keys",
        help="The key columns in the file being filtered (default=None).",
        nargs='*')

    parser.add_argument(
        "--filter-keys",
        "--right-keys",
        dest="filter_keys",
        help="The key columns in the filter-on file (default=None).",
        nargs='*')

    parser.add_argument(
        "--cache-input",
        dest="cache_input",
        metavar="True|False",
        help=
        "Cache the input file instead of the filter keys (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=False)

    parser.add_argument(
        "--preserve-order",
        dest="preserve_order",
        metavar="True|False",
        help=
        "Preserve record order when cacheing the input file. (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=False)

    parser.add_argument(
        "--presorted",
        dest="presorted",
        metavar="True|False",
        help=
        "When True, assume that the input and filter files are both presorted.  Use a merge-style algorithm that does not require caching either file. (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=False)

    parser.add_argument(
        "--field-separator",
        dest="field_separator",
        help=h("Separator for multifield keys (default=%(default)s)"),
        default=KgtkIfExists.FIELD_SEPARATOR_DEFAULT)

    parser.add_argument(
        "--left-join",
        dest="left_join",
        metavar="True|False",
        help=
        h("When True, Include all input records in the join (EXPERIMENTAL). (default=%(default)s)."
          ),
        type=optional_bool,
        nargs='?',
        const=True,
        default=False)

    parser.add_argument(
        "--right-join",
        dest="right_join",
        metavar="True|False",
        help=
        h("When True, Include all filter records in the join (EXPERIMENTAL). (default=%(default)s)."
          ),
        type=optional_bool,
        nargs='?',
        const=True,
        default=False)

    parser.add_argument(
        "--input-prefix",
        dest="input_prefix",
        help=
        h("Input file column name prefix for joins (EXPERIMENTAL). (default=%(default)s)"
          ))

    parser.add_argument(
        "--filter-prefix",
        dest="filter_prefix",
        help=
        h("Filter file column name prefix for joins (EXPERIMENTAL). (default=%(default)s)"
          ))

    parser.add_argument(
        "--join-output",
        dest="join_output",
        metavar="True|False",
        help=
        h("When True, send the join records to the main output (EXPERIMENTAL). (default=%(default)s)."
          ),
        type=optional_bool,
        nargs='?',
        const=True,
        default=False)

    parser.add_argument(
        "--right-join-first",
        dest="right_first",
        metavar="True|False",
        help=
        h("When True, send the filter record to join output before the first matching input record. "
          +
          " Otherwise, send the first matching input record, then the filter record, then othe rmatching input records. "
          + "(EXPERIMENTAL). (default=%(default)s)."),
        type=optional_bool,
        nargs='?',
        const=True,
        default=False)

    KgtkReader.add_debug_arguments(parser, expert=_expert)
    KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=_expert)
    KgtkReaderOptions.add_arguments(parser,
                                    mode_options=True,
                                    who="input",
                                    expert=_expert,
                                    defaults=False)
    KgtkReaderOptions.add_arguments(parser,
                                    mode_options=True,
                                    who="filter",
                                    expert=_expert,
                                    defaults=False)
    KgtkValueOptions.add_arguments(parser, expert=_expert)
Ejemplo n.º 23
0
def add_arguments_extended(parser: KGTKArgumentParser,
                           parsed_shared_args: Namespace):
    """
    Parse arguments
    Args:
            parser (argparse.ArgumentParser)
    """
    from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions
    from kgtk.utils.argparsehelpers import optional_bool
    from kgtk.value.kgtkvalueoptions import KgtkValueOptions

    _expert: bool = parsed_shared_args._expert

    parser.add_input_file(positional=True,
                          who="The KGTK file to find connected components in.")
    parser.add_output_file()

    # parser.add_argument(action="store", type=str, dest="filename", metavar='filename', help='input filename here')
    # parser.add_argument('-o', '--out', action='store', type=str, dest='output', help='File to output the reachable nodes,if empty will be written out to standard output',default=None)

    parser.add_argument(
        '--root',
        action='store',
        dest='root',
        type=str,
        nargs="*",
        help=
        'Set of root nodes to use, space- or comma-separated strings. (default=None)'
    )
    parser.add_argument(
        '--root-file',
        '--rootfile',
        action='store',
        dest='rootfile',
        help='Option to specify a file containing the set of root nodes',
        default=None)
    parser.add_argument(
        '--rootfilecolumn',
        action='store',
        type=str,
        dest='rootfilecolumn',
        help=
        'Specify the name or number of the root file column with the root nodes.  (default=node1 or its alias if edge file, id if node file)'
    )
    parser.add_argument(
        "--subj",
        action="store",
        type=str,
        dest="subject_column_name",
        help='Name of the subject column. (default: node1 or its alias)')
    parser.add_argument(
        "--obj",
        action="store",
        type=str,
        dest="object_column_name",
        help='Name of the object column. (default: label or its alias)')
    parser.add_argument(
        "--pred",
        action="store",
        type=str,
        dest="predicate_column_name",
        help='Name of the predicate column. (default: node2 or its alias)')

    parser.add_argument(
        "--prop",
        "--props",
        action="store",
        type=str,
        dest="props",
        nargs="*",
        help=
        'Properties to consider while finding reachable nodes, space- or comma-separated string. (default: all properties)',
        default=None)
    parser.add_argument(
        '--props-file',
        action='store',
        dest='props_file',
        help='Option to specify a file containing the set of properties',
        default=None)
    parser.add_argument(
        '--propsfilecolumn',
        action='store',
        type=str,
        dest='propsfilecolumn',
        default=None,
        help=
        'Specify the name or number of the props file column with the property names.  (default=node1 or its alias if edge file, id if node file)'
    )

    parser.add_argument(
        '--inverted',
        dest="inverted",
        help=
        "When True, and when --undirected is False, invert the source and target nodes in the graph. (default=%(default)s)",
        type=optional_bool,
        nargs='?',
        const=True,
        default=False,
        metavar="True|False")

    parser.add_argument(
        "--inverted-prop",
        "--inverted-props",
        action="store",
        type=str,
        dest="inverted_props",
        nargs="*",
        help=
        'Properties to invert, space- or comma-separated string. (default: no properties)',
        default=None)
    parser.add_argument(
        '--inverted-props-file',
        action='store',
        dest='inverted_props_file',
        help=
        'Option to specify a file containing the set of inverted properties',
        default=None)
    parser.add_argument(
        '--invertedpropsfilecolumn',
        action='store',
        type=str,
        dest='invertedpropsfilecolumn',
        default=None,
        help=
        'Specify the name or number of the inverted props file column with the property names.  (default=node1 or its alias if edge file, id if node file)'
    )

    parser.add_argument(
        '--undirected',
        dest="undirected",
        help="When True, specify graph as undirected. (default=%(default)s)",
        type=optional_bool,
        nargs='?',
        const=True,
        default=False,
        metavar="True|False")

    parser.add_argument(
        "--undirected-prop",
        "--undirected-props",
        action="store",
        type=str,
        dest="undirected_props",
        nargs="*",
        help=
        'Properties to treat as undirected, space- or comma-separated string. (default: no properties)',
        default=None)
    parser.add_argument(
        '--undirected-props-file',
        action='store',
        dest='undirected_props_file',
        help=
        'Option to specify a file containing the set of undirected properties',
        default=None)
    parser.add_argument(
        '--undirectedpropsfilecolumn',
        action='store',
        type=str,
        dest='undirectedpropsfilecolumn',
        default=None,
        help=
        'Specify the name or number of the undirected props file column with the property names.  (default=node1 or its alias if edge file, id if node file)'
    )

    parser.add_argument(
        '--label',
        action='store',
        type=str,
        dest='label',
        help='The label for the reachable relationship. (default: %(default)s)',
        default="reachable")
    parser.add_argument(
        '--selflink',
        dest='selflink_bool',
        help=
        'When True, include a link from each output node to itself. (default=%(default)s)',
        type=optional_bool,
        nargs='?',
        const=True,
        default=False,
        metavar="True|False")

    parser.add_argument(
        '--show-properties',
        dest='show_properties',
        help='When True, show the graph properties. (default=%(default)s)',
        type=optional_bool,
        nargs='?',
        const=True,
        default=False,
        metavar="True|False")

    parser.add_argument(
        '--breadth-first',
        dest='breadth_first',
        help=
        'When True, search the graph breadth first.  When false, search depth first. (default=%(default)s)',
        type=optional_bool,
        nargs='?',
        const=True,
        default=False,
        metavar="True|False")

    parser.add_argument(
        '--depth-limit',
        dest='depth_limit',
        help=
        'An optional depth limit for breadth-first searches. (default=%(default)s)',
        type=int,
        default=None)

    KgtkReader.add_debug_arguments(parser, expert=_expert)
    KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=_expert)
    KgtkReaderOptions.add_arguments(parser,
                                    mode_options=True,
                                    who="input",
                                    expert=_expert,
                                    defaults=False)
    KgtkReaderOptions.add_arguments(parser,
                                    mode_options=True,
                                    who="root",
                                    expert=_expert,
                                    defaults=False)
    KgtkReaderOptions.add_arguments(parser,
                                    mode_options=True,
                                    who="props",
                                    expert=_expert,
                                    defaults=False)
    KgtkReaderOptions.add_arguments(parser,
                                    mode_options=True,
                                    who="undirected_props",
                                    expert=_expert,
                                    defaults=False)
    KgtkReaderOptions.add_arguments(parser,
                                    mode_options=True,
                                    who="inverted_props",
                                    expert=_expert,
                                    defaults=False)
    KgtkValueOptions.add_arguments(parser, expert=_expert)
Ejemplo n.º 24
0
def add_arguments(parser: KGTKArgumentParser):
    """
    Parse arguments
    Args:
        parser (argparse.ArgumentParser)
    """
    # import modules locally
    from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions
    from kgtk.value.kgtkvalueoptions import KgtkValueOptions

    ### IO
    parser.add_argument("-i",
                        "--input-file",
                        dest="input_file_path",
                        help="The KGTK input file. (default=%(default)s)",
                        type=Path,
                        default="-")
    parser.add_argument("-o",
                        "--output-file",
                        dest="output_file_path",
                        help="The KGTK output file. (default=%(default)s).",
                        type=Path,
                        default="-")
    parser.add_argument('-l',
                        "--log",
                        dest="log_file_path",
                        help="Setting the log path [Default: None]",
                        type=Path,
                        default=None,
                        metavar="")
    parser.add_argument(
        '-T',
        '--temporary_directory',
        dest='temporary_directory',
        help="Sepecify the directory location to store temporary file",
        type=Path,
        default=Path('/tmp/'),
        metavar='')
    parser.add_argument(
        '-ot',
        '--output_format',
        dest='output_format',
        help=
        "Outputformat for embeddings [Default: w2v] Choice: kgtk | w2v | glove",
        default='w2v',
        metavar='')
    parser.add_argument(
        '-r',
        '--retain_temporary_data',
        dest='retain_temporary_data',
        help=
        "When opearte graph, some tempory files will be generated, set True to retain these files ",
        type=bool,
        default=True,
        metavar='True|False')
    ### Training parameters
    parser.add_argument(
        '-d',
        "--dimension",
        dest="dimension_num",
        help="Dimension of the real space the embedding live in [Default: 100]",
        type=int,
        default=100,
        metavar="")
    parser.add_argument(
        '-s',
        "--init_scale",
        dest="init_scale",
        help=
        "Generating the initial embedding with this standard deviation [Default: 0.001]"
        +
        "If no initial embeddings are provided, they are generated by sampling each dimension"
        +
        "from a centered normal distribution having this standard deviation.",
        type=float,
        default=0.001,
        metavar="")
    parser.add_argument(
        '-c',
        '--comparator',
        dest='comparator',
        help=
        "How the embeddings of the two sides of an edge (after having already "
        +
        "undergone some processing) are compared to each other to produce a score[Default: dot],"
        + "Choice: dot|cos|l2|squared_l2",
        default='dot',
        choices=['dot', 'cos', 'l2', 'squared_l2'],
        metavar='dot|cos|l2|squared_l2')
    parser.add_argument(
        '-op',
        '--operator',
        dest='operator',
        help=
        "The transformation to apply to the embedding of one of the sides of the edge "
        +
        "(typically the right-hand one) before comparing it with the other one. It reflects which model that embedding uses. "
        + "[Default:ComplEx]",
        #default will be setting to complex_diagonal later
        default='ComplEx',
        metavar='RESCAL|DistMult|ComplEx|TransE')
    parser.add_argument(
        '-e',
        '--num_epochs',
        dest='num_epochs',
        help=
        "The number of times the training loop iterates over all the edges.[Default:100]",
        type=int,
        default=100,
        metavar='')
    parser.add_argument(
        '-b',
        '--bias',
        dest='bias',
        help=
        "Whether use the bias choice [Default: False],If enabled, withhold the first "
        +
        "dimension of the embeddings from the comparator and instead use it as a bias, adding "
        +
        "back to the score. Makes sense for logistic and softmax loss functions. ",
        type=bool,
        default=False,
        metavar='True|False')
    parser.add_argument(
        '-w',
        '--workers',
        dest='workers',
        help=
        "The number of worker processes for training. If not given, set to CPU count.",
        type=int,
        default=None,
        metavar='')
    parser.add_argument('-bs',
                        '--batch_size',
                        dest='batch_size',
                        help="The number of edges per batch.[Default:1000]",
                        type=int,
                        default=1000,
                        metavar='')
    parser.add_argument(
        '-lf',
        '--loss_fn',
        dest='loss_fn',
        help=
        "How the scores of positive edges and their corresponding negatives " +
        "are evaluated.[Default: ranking], Choice: ranking|logistic|softmax",
        # default will be setting to ranking later
        default=None,
        choices=['ranking', 'logistic', 'softmax', None],
        metavar='ranking|logistic|softmax')
    parser.add_argument(
        '-lr',
        '--learning_rate',
        dest='learning_rate',
        help="The learning rate for the optimizer.[Default: 0.1]",
        # default will be setting to 0.1 later
        type=float,
        default=None,
        metavar='')
    parser.add_argument(
        '-ef',
        '--eval_fraction',
        dest='eval_fraction',
        help=
        "The fraction of edges withheld from training and used to track evaluation "
        + "metrics during training. [Defalut:0.0 training all edges ]",
        type=float,
        default=0.0,
        metavar='')
    parser.add_argument(
        '-dr',
        '--dynamic_relaitons',
        dest='dynamic_relaitons',
        help="Whether use dynamic relations (when graphs with a " +
        "large number of relations) [Default: True]",
        type=bool,
        default=True,
        metavar='True|False')
    parser.add_argument(
        '-ge',
        '--global_emb',
        dest='global_emb',
        help=
        "Whether use global embedding, if enabled, add to each embedding a vector that is common "
        "to all the entities of a certain type. This vector is learned during training.[Default: False] ",
        type=bool,
        default=False,
        metavar='True|False')
    ### kgtk format
    KgtkReader.add_debug_arguments(parser)
    KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=True)
    KgtkValueOptions.add_arguments(parser)
Ejemplo n.º 25
0
def add_arguments_extended(parser: KGTKArgumentParser,
                           parsed_shared_args: Namespace):
    """
    Parse arguments
    Args:
        parser (argparse.ArgumentParser)
    """
    from kgtk.kgtkformat import KgtkFormat
    from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions
    from kgtk.join.unique import Unique
    from kgtk.utils.argparsehelpers import optional_bool
    from kgtk.value.kgtkvalueoptions import KgtkValueOptions

    _expert: bool = parsed_shared_args._expert

    # This helper function makes it easy to suppress options from
    # The help message.  The options are still there, and initialize
    # what they need to initialize.
    def h(msg: str) -> str:
        if _expert:
            return msg
        else:
            return SUPPRESS

    parser.add_input_file(positional=True)
    parser.add_output_file()

    parser.add_argument(
        "--column",
        dest="column_name",
        help="The column to count unique values (default=node2 or its alias).")

    parser.add_argument(
        "--empty",
        dest="empty_value",
        help="A value to substitute for empty values (default=%(default)s).",
        default="")

    parser.add_argument(
        "--label",
        dest="label_value",
        help="The output file label column value (default=%(default)s).",
        default="count")

    # TODO: use an emum
    parser.add_argument(
        "--format",
        dest="output_format",
        help="The output file format and mode (default=%(default)s).",
        default=Unique.DEFAULT_FORMAT,
        choices=Unique.OUTPUT_FORMATS)

    parser.add_argument("--prefix",
                        dest="prefix",
                        help="The value prefix (default=%(default)s).",
                        default="")

    parser.add_argument(
        "--where",
        dest="where_column_name",
        help=
        "The name of a column for a record selection test. (default=%(default)s).",
        default=None)

    parser.add_argument(
        "--in",
        dest="where_values",
        nargs="+",
        help=
        "The list of values for a record selection test. (default=%(default)s).",
        default=None)

    parser.add_argument(
        "--presorted",
        dest="presorted",
        metavar="True|False",
        help="When True, the input file is presorted. (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=False)

    KgtkReader.add_debug_arguments(parser, expert=_expert)
    KgtkReaderOptions.add_arguments(parser, mode_options=True, expert=_expert)
    KgtkValueOptions.add_arguments(parser, expert=_expert)
Ejemplo n.º 26
0
def main():
    """
    Test the KGTK unique processor.
    """
    parser: ArgumentParser = ArgumentParser()

    parser.add_argument(dest="input_file_path",
                        help="The KGTK file with the input data",
                        type=Path,
                        nargs="?")

    parser.add_argument("--column",
                        dest="column_name",
                        help="The column to count unique values (required).",
                        required=True)

    parser.add_argument(
        "--empty",
        dest="empty_value",
        help="A value to substitute for empty values (default=%(default)s).",
        default="")

    parser.add_argument("-o",
                        "--output-file",
                        dest="output_file_path",
                        help="The KGTK file to write (default=%(default)s).",
                        type=Path,
                        default="-")

    parser.add_argument(
        "--label",
        dest="label_value",
        help="The output file label column value (default=%(default)s).",
        default="count")

    # TODO: use an enum
    parser.add_argument(
        "--format",
        dest="output_format",
        help="The output file format and mode (default=%(default)s).",
        default="edge",
        choices=["edge", "node"])

    parser.add_argument("--prefix",
                        dest="prefix",
                        help="The value prefix (default=%(default)s).",
                        default="")

    KgtkReader.add_debug_arguments(parser)
    KgtkReaderOptions.add_arguments(parser, mode_options=True)
    KgtkValueOptions.add_arguments(parser)

    args: Namespace = parser.parse_args()

    error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr

    # Build the option structures.
    reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args)
    value_options: KgtkValueOptions = KgtkValueOptions.from_args(args)

    # Show the final option structures for debugging and documentation.
    if args.show_options:
        print("input: %s" % (str(args.input_file_path)
                             if args.input_file_path is not None else "-"),
              file=error_file)
        print("--column=%s" % args.column_name, file=error_file)
        print("--empty=%s" % args.empty_value, file=error_file)
        print("--output-file=%s" % str(args.output_file_path), file=error_file)
        print("--label=%s" % args.label_value, file=error_file)
        print("--format=%s" % args.output_format, file=error_file)
        print("--prefix=%s" % args.prefix, file=error_file)
        reader_options.show(out=error_file)
        value_options.show(out=error_file)

    uniq: Unique = Unique(input_file_path=args.input_file_path,
                          column_name=args.column_name,
                          output_file_path=args.output_file_path,
                          empty_value=args.empty_value,
                          label_value=args.label_value,
                          output_format=args.output_format,
                          prefix=args.prefix,
                          reader_options=reader_options,
                          value_options=value_options,
                          error_file=error_file,
                          verbose=args.verbose,
                          very_verbose=args.very_verbose)

    uniq.process()
Ejemplo n.º 27
0
def add_arguments_extended(parser: KGTKArgumentParser,
                           parsed_shared_args: Namespace):
    """
    Parse arguments
    Args:
        parser (argparse.ArgumentParser)
    """

    _expert: bool = parsed_shared_args._expert

    parser.add_input_file(positional=True)
    parser.add_output_file()

    parser.add_argument(
        '-c',
        "--columns",
        action="store",
        type=str,
        dest="columns",
        nargs='+',
        required=True,
        help=
        "Columns to remove as a comma- or space-separated strings, e.g., id,docid or id docid"
    )

    parser.add_argument(
        "--split-on-commas",
        dest="split_on_commas",
        help=
        "Parse the list of columns, splitting on commas. (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=True)

    parser.add_argument(
        "--split-on-spaces",
        dest="split_on_spaces",
        help=
        "Parse the list of columns, splitting on spaces. (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=False)

    parser.add_argument(
        "--strip-spaces",
        dest="strip_spaces",
        help=
        "Parse the list of columns, stripping whitespace. (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=True)

    KgtkReader.add_debug_arguments(parser, expert=_expert)
    KgtkReaderOptions.add_arguments(parser,
                                    mode_options=True,
                                    default_mode=KgtkReaderMode.NONE,
                                    expert=_expert)
    KgtkValueOptions.add_arguments(parser, expert=_expert)
Ejemplo n.º 28
0
def add_arguments_extended(parser: KGTKArgumentParser,
                           parsed_shared_args: Namespace):
    """
    Parse arguments
    Args:
            parser (argparse.ArgumentParser)
    """
    from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions, KgtkReaderMode
    from kgtk.utils.argparsehelpers import optional_bool
    from kgtk.value.kgtkvalueoptions import KgtkValueOptions

    _expert: bool = parsed_shared_args._expert

    parser.add_input_file(positional=True)
    parser.add_output_file()
    parser.add_input_file(who="KGTK file with path start and end nodes.",
                          options=["--path-file", "--path_file"],
                          dest="path_file",
                          metavar="PATH_FILE",
                          optional=False)

    parser.add_argument(
        '--statistics-only',
        dest='statistics_only',
        help=
        'If this flag is set, output only the statistics edges. Else, append the statistics to the original graph. (default=%(default)s)',
        type=optional_bool,
        nargs='?',
        const=True,
        default=False,
        metavar="True|False")

    parser.add_argument(
        '--undirected',
        dest="undirected",
        help="Is the graph undirected or not? (default=%(default)s)",
        type=optional_bool,
        nargs='?',
        const=True,
        default=False,
        metavar="True|False")

    parser.add_argument('--max-hops',
                        '--max_hops',
                        action="store",
                        type=int,
                        dest="max_hops",
                        help="Maximum number of hops allowed.")

    parser.add_argument(
        "--path-source",
        action="store",
        type=str,
        dest="source_column_name",
        help=
        'Name of the source column in the path file. (default: node1 or its alias)'
    )
    parser.add_argument(
        "--path-target",
        action="store",
        type=str,
        dest="target_column_name",
        help=
        'Name of the source column in the path file. (default: node2 or its alias)'
    )

    parser.add_argument(
        "--shortest-path",
        dest="shortest_path",
        metavar="True|False",
        help="When true, shortest paths are returned. (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=False)

    KgtkReader.add_debug_arguments(parser, expert=_expert)
    KgtkReaderOptions.add_arguments(
        parser,
        mode_options=True,
        default_mode=KgtkReaderMode[parsed_shared_args._mode],
        expert=_expert)
    KgtkReaderOptions.add_arguments(parser,
                                    mode_options=True,
                                    who="input",
                                    expert=_expert,
                                    defaults=False)
    KgtkReaderOptions.add_arguments(parser,
                                    mode_options=True,
                                    who="path",
                                    expert=_expert,
                                    defaults=False)
    KgtkValueOptions.add_arguments(parser, expert=_expert)
Ejemplo n.º 29
0
def add_arguments_extended(parser: KGTKArgumentParser,
                           parsed_shared_args: Namespace):
    """                                                                                                                                                               
    Parse arguments                                                                                                                                                   
    Args:                                                                                                                                                             
        parser (argparse.ArgumentParser)                                                                                                                              
    """
    # Import modules thay we will use when declaring arguments.
    from kgtk.io.kgtkreader import KgtkReader, KgtkReaderOptions, KgtkReaderMode
    from kgtk.utils.argparsehelpers import optional_bool
    from kgtk.value.kgtkvalueoptions import KgtkValueOptions

    # These special shared aruments inticate whether the `--expert` option
    # was supplied and the command name that was used.
    _expert: bool = parsed_shared_args._expert
    _command: str = parsed_shared_args._command

    # This helper function makes it easy to suppress options from the help
    # message unless `--expert` has bee asserted.  The options are still
    # there, and initialize what they need to initialize.
    def h(msg: str) -> str:
        if _expert:
            return msg
        else:
            return SUPPRESS

    # Add the primary input and output files without special features.
    parser.add_input_file()
    parser.add_output_file()

    # The default value for this option depends upon the command used.
    parser.add_argument(
        '-l',
        '--lines',
        dest="count_records",
        metavar="True/False",
        help="If true, count records and print a single number to stdout. " +
        "If false, count non-empty values per column and produce a simple KGTK output file. (default=%(default)s).",
        type=optional_bool,
        nargs='?',
        const=True,
        default=DEFAULT_COUNT_RECORDS_WC
        if _command == WC_COMMAND else DEFAULT_COUNT_RECORDS)

    # This is an expert option.  It will not show up on `--help` without `--expert`:
    parser.add_argument(
        "--count-property",
        dest="count_property",
        help=
        h("The property used for column count output edges. (default=%(default)s)."
          ),
        default=DEFAULT_COUNT_PROPERTY)

    # Add the standard debugging arguments and the KgtkReader and KgtkValue
    # options.
    KgtkReader.add_debug_arguments(parser, expert=_expert)
    KgtkReaderOptions.add_arguments(
        parser,
        mode_options=True,
        default_mode=KgtkReaderMode[parsed_shared_args._mode],
        expert=_expert)
    KgtkValueOptions.add_arguments(parser, expert=_expert)
Ejemplo n.º 30
0
def main():
    """
    Test the KGTK copy template.
    """
    parser: ArgumentParser = ArgumentParser()

    parser.add_argument("-i",
                        "--input-file",
                        dest="input_file_path",
                        help="The KGTK input file. (default=%(default)s)",
                        type=Path,
                        default="-")

    parser.add_argument("-o",
                        "--output-file",
                        dest="output_file_path",
                        help="The KGTK output file. (default=%(default)s).",
                        type=Path,
                        default="-")

    parser.add_argument(
        "--reified-file",
        dest="reified_file_path",
        help=
        "A KGTK output file that will contain only the reified values. (default=%(default)s).",
        type=Path,
        default=None)

    parser.add_argument(
        "--unreified-file",
        dest="unreified_file_path",
        help=
        "A KGTK output file that will contain only the unreified values. (default=%(default)s).",
        type=Path,
        default=None)

    parser.add_argument(
        "--uninvolved-file",
        dest="uninvolved_file_path",
        help=
        "A KGTK output file that will contain only the uninvolved input records. (default=%(default)s).",
        type=Path,
        default=None)

    parser.add_argument("--output-format",
                        dest="output_format",
                        help="The file format (default=kgtk)",
                        type=str,
                        choices=KgtkWriter.OUTPUT_FORMAT_CHOICES)

    KgtkUnreifyValues.add_arguments(parser)
    KgtkReader.add_debug_arguments(parser)
    KgtkReaderOptions.add_arguments(parser, mode_options=False, expert=True)
    KgtkValueOptions.add_arguments(parser)

    args: Namespace = parser.parse_args()

    error_file: typing.TextIO = sys.stdout if args.errors_to_stdout else sys.stderr

    # Build the option structures.
    reader_options: KgtkReaderOptions = KgtkReaderOptions.from_args(args)
    value_options: KgtkValueOptions = KgtkValueOptions.from_args(args)

    # Show the final option structures for debugging and documentation.
    if args.show_options:
        print("--input-files %s" %
              " ".join([str(path) for path in input_file_paths]),
              file=error_file,
              flush=True)
        print("--output-file=%s" % str(args.output_file_path),
              file=error_file,
              flush=True)
        if args.reified_file_path is not None:
            print("--reified-file=%s" % str(args.reified_file_path),
                  file=error_file,
                  flush=True)
        if args.unreified_file_path is not None:
            print("--unreified-file=%s" % str(args.unreified_file_path),
                  file=error_file,
                  flush=True)
        if args.uninvolved_file_path is not None:
            print("--uninvolved-file=%s" % str(args.uninvolved_file_path),
                  file=error_file,
                  flush=True)

        if args.output_format is not None:
            print("--output-format=%s" % args.output_format,
                  file=error_file,
                  flush=True)

        if args.trigger_label_value is not None:
            print("--trigger-label=%s" % args.trigger_label_value,
                  file=error_file,
                  flush=True)
        if args.trigger_node2_value is not None:
            print("--trigger-node2=%s" % args.trigger_node2_value,
                  file=error_file,
                  flush=True)
        if args.value_label_value is not None:
            print("--value-label=%s" % args.value_label_value,
                  file=error_file,
                  flush=True)
        if args.old_label_value is not None:
            print("--old-label=%s" % args.old_label_value,
                  file=error_file,
                  flush=True)
        if args.new_label_value is not None:
            print("--new-label=%s" % args.new_label_value,
                  file=error_file,
                  flush=True)

        print("--allow-multiple-values=%s" % str(args.allow_multiple_values),
              file=error_file,
              flush=True)
        print("--allow-extra-columns=%s" % str(args.allow_extra_columns),
              file=error_file,
              flush=True)

        reader_options.show(out=error_file)
        value_options.show(out=error_file)

    kuv: KgtkUnreifyValues = KgtkUnreifyValues(
        input_file_path=args.input_file_path,
        output_file_path=args.output_file_path,
        reified_file_path=args.reified_file_path,
        unreified_file_path=args.unreified_file_path,
        uninvolved_file_path=args.uninvolved_file_path,
        trigger_label_value=args.trigger_label_value,
        trigger_node2_value=args.trigger_node2_value,
        value_label_value=args.value_label_value,
        old_label_value=args.old_label_value,
        new_label_value=args.new_label_value,
        allow_multiple_values=args.allow_multiple_values,
        allow_extra_columns=args.allow_extra_columns,
        reader_options=reader_options,
        value_options=value_options,
        output_format=args.output_format,
        error_file=error_file,
        verbose=args.verbose,
        very_verbose=args.very_verbose,
    )

    kuv.process()