Esempio n. 1
0
    def test_argparse_no_patch(self):
        parser = argparse.ArgumentParser()
        method = parser.parse_args
        slogging.add_logging_args(parser, False)
        self.assertEqual(method, parser.parse_args)

        parser = argparse.ArgumentParser()
        method = parser.parse_args
        slogging.add_logging_args(parser)
        self.assertNotEqual(method, parser.parse_args)
Esempio n. 2
0
def run_slogging_main():
    parser = argparse.ArgumentParser()
    slogging.add_logging_args(parser)

    def my_setup(level: Union[str, int], structured: bool, config_path: str):
        print(level)
        print(structured)
        print(config_path)

    slogging.setup = my_setup
    parser.parse_args()
Esempio n. 3
0
 def test_argparse_erase_args(self):
     log_args = {"log_level", "log_structured", "log_config"}
     parser = argparse.ArgumentParser()
     slogging.add_logging_args(parser, erase_args=True)
     with patch.object(sys, "argv"):
         with patch("modelforge.slogging.setup"):
             def my_setup(*args):
                 pass
             slogging.setup = my_setup
             args = parser.parse_args()
             self.assertEqual(len(log_args.intersection(vars(args))), 0)
Esempio n. 4
0
def create_parser() -> ArgumentParser:
    """
    Create a parser for the lookout.style.format utility.

    :return: an ArgumentParser with an handler defined in the handler attribute.
    """
    # Deferred imports to speed up loading __init__
    from lookout.style.format.benchmarks.compare_quality_reports import \
        compare_quality_reports_entry
    from lookout.style.format.benchmarks.evaluate_smoke import evaluate_smoke_entry
    from lookout.style.format.benchmarks.generate_smoke import generate_smoke_entry
    from lookout.style.format.benchmarks.quality_report import generate_quality_report
    from lookout.style.format.benchmarks.general_report import print_reports
    from lookout.style.format.benchmarks.quality_report_noisy import quality_report_noisy
    from lookout.style.format.benchmarks.expected_vnodes_number import \
        calc_expected_vnodes_number_entry

    parser = ArgumentParser(
        formatter_class=ArgumentDefaultsHelpFormatterNoNone)

    # General options
    slogging.add_logging_args(parser)

    subparsers = parser.add_subparsers(help="Commands")

    def add_parser(name, help):
        return subparsers.add_parser(
            name,
            help=help,
            formatter_class=ArgumentDefaultsHelpFormatterNoNone)

    # Evaluation
    eval_parser = add_parser("eval",
                             "Evaluate trained model on given dataset.")
    eval_parser.set_defaults(handler=print_reports)
    add_input_pattern_arg(eval_parser)
    add_bblfsh_arg(eval_parser)
    add_model_args(eval_parser)
    eval_parser.add_argument(
        "-n",
        "--n-files",
        default=0,
        type=int,
        help="How many files with most mispredictions to show. "
        "If n <= 0 show all.")

    # Generate quality report for the given data
    quality_report_parser = add_parser(
        "quality-report", "Generate quality report on a given data.")
    quality_report_parser.set_defaults(handler=generate_quality_report)
    quality_report_parser.add_argument(
        "-i",
        "--input",
        required=True,
        help=
        "csv file with repositories to make report. Should contain url, to and from columns."
    )
    quality_report_parser.add_argument("-o",
                                       "--output",
                                       required=True,
                                       help="Directory where to save results.")
    quality_report_parser.add_argument(
        "-f",
        "--force",
        default=False,
        action="store_true",
        help="Force to overwrite results stored in output directory if True. \
                 Stored results will be used if False.")
    quality_report_parser.add_argument("-b",
                                       "--bblfsh",
                                       help="Bblfsh address to use.")
    quality_report_parser.add_argument(
        "--config",
        type=json.loads,
        default="{}",
        help="Config for analyzer in json format.")
    quality_report_parser.add_argument(
        "--database",
        default=None,
        help="sqlite3 database path to store the models."
        "Temporary file is used if not set.")
    quality_report_parser.add_argument(
        "--fs",
        default=None,
        help="Model repository file system root. "
        "Temporary directory is used if not set.")

    # Generate the quality report based on the artificial noisy dataset
    quality_report_noisy_parser = add_parser(
        "quality-report-noisy", "Quality report on the "
        "artificial noisy dataset")
    quality_report_noisy_parser.set_defaults(handler=quality_report_noisy)
    add_bblfsh_arg(quality_report_noisy_parser)
    add_rules_thresholds(quality_report_noisy_parser)
    quality_report_noisy_parser.add_argument(
        "-l",
        "--language",
        default="javascript",
        help="Programming language to use.")
    quality_report_noisy_parser.add_argument(
        "--repos",
        type=str,
        help=
        "list of urls or paths to the repositories to analyze. Should be strings separated "
        "by newlines.")
    quality_report_noisy_parser.add_argument(
        "--precision-threshold",
        type=float,
        default=0.95,
        help="Precision threshold tolerated for the model.")
    quality_report_noisy_parser.add_argument(
        "-o",
        "--dir-output",
        required=True,
        type=str,
        help=
        "Path to the output directory where to store the quality report and the "
        "precision-recall curve.")

    # Compare two quality reports summaries
    compare_quality_parser = add_parser(
        "compare-quality",
        "Creates a file with the differences in quality metrics between two reports."
    )
    compare_quality_parser.set_defaults(handler=compare_quality_reports_entry)
    compare_quality_parser.add_argument(
        "--base",
        type=str,
        required=True,
        help=
        "Baseline report. Usually the latest report from ./report/ directory.")
    compare_quality_parser.add_argument(
        "--new",
        type=str,
        required=True,
        help=
        "New report. Usually It is a report generated for master or any local \
                       change you did and want to validate.")
    compare_quality_parser.add_argument(
        "-o",
        "--output",
        type=str,
        required=True,
        help="Path to the file to save result or - to print to stdout.")

    # Generate dataset of different styles in code for smoke testing.
    gen_smoke_parser = add_parser(
        "gen-smoke-dataset", "Generate dataset with different styles. "
        "Helps to check the basic system functionality. "
        "Only JavaScript code is supported now.")
    gen_smoke_parser.set_defaults(handler=generate_smoke_entry)
    gen_smoke_parser.add_argument(
        "inputpath",
        type=str,
        help="Path to the tar.xz archive containing initial repositories.")
    gen_smoke_parser.add_argument(
        "outputpath",
        type=str,
        help=
        "Path to the directory where the generated dataset should be stored.")
    gen_smoke_parser.add_argument("--force",
                                  default=False,
                                  action="store_true",
                                  help="Override output directory if exists.")

    # Evaluate on different styles dataset
    eval_smoke_parser = add_parser(
        "eval-smoke-dataset", "Evaluate on the dataset with different styles.")
    eval_smoke_parser.set_defaults(handler=evaluate_smoke_entry)
    eval_smoke_parser.add_argument(
        "inputpath",
        type=str,
        help="Path to the directory where the generated dataset is stored. "
        "To generate a dataset run gen-smoke-dataset command.")
    eval_smoke_parser.add_argument(
        "reportdir",
        type=str,
        help="Path for report performance output directory.")
    eval_smoke_parser.add_argument("--bblfsh",
                                   help="Babelfish server's address.")
    eval_smoke_parser.add_argument("--config",
                                   type=json.loads,
                                   default="{}",
                                   help="JSON config for FormatAnalyzer.")
    eval_smoke_parser.add_argument(
        "--database",
        type=str,
        default=None,
        help="Path to the sqlite3 database with trained models metadata. "
        "Enables reusing previously trained models.")

    rule_parser = add_parser("rule", "Print rule description by its hash.")
    rule_parser.set_defaults(handler=dump_rule_entry)
    rule_parser.add_argument("model", help="Path to the model file.")
    rule_parser.add_argument("hash", help="Hash of the rule (8 chars).")

    # FIXME(zurk): remove when https://github.com/src-d/style-analyzer/issues/557 is resolved
    calc_expected_vnodes = add_parser(
        "calc-expected-vnodes-number",
        "Write the CSV file with expected numbers of virtual nodes "
        "extracted from repositories. Required for quality report "
        "generation. It is a workaround for "
        "https://github.com/src-d/style-analyzer/issues/557. "
        "Docker service is required to be running.")
    calc_expected_vnodes.set_defaults(
        handler=calc_expected_vnodes_number_entry)
    calc_expected_vnodes.add_argument(
        "-i",
        "--input",
        required=True,
        help="CSV file with repositories for quality report."
        "Should contain url, to and from columns.")
    calc_expected_vnodes.add_argument("-o",
                                      "--output",
                                      required=True,
                                      help="Path to a output csv file.")
    calc_expected_vnodes.add_argument(
        "-r",
        "--runs",
        default=3,
        help="Number of repeats to ensure the result correctness.")

    return parser
Esempio n. 5
0
def parse_args() -> argparse.Namespace:
    """
    Create the cmdline argument parser.
    """
    parser = argparse.ArgumentParser(
        formatter_class=ArgumentDefaultsHelpFormatterNoNone)
    slogging.add_logging_args(parser, patch=True, erase_args=False)

    # Create and construct subparsers
    subparsers = parser.add_subparsers(help="Commands", dest="command")

    def add_parser(name, help_message):
        return subparsers.add_parser(
            name,
            help=help_message,
            formatter_class=ArgumentDefaultsHelpFormatterNoNone)

    # --------------------------------------------------------------------------------------------

    clickhouse2deps_parser = add_parser(
        "clickhouse2deps", "Extract dependencies from a ClickHouse DB.")
    clickhouse2deps_parser.set_defaults(handler=clickhouse2deps)
    clickhouse2deps_parser.add_argument(
        "-o",
        "--output-path",
        type=Path,
        help=
        "Output path to the resulting ASDF model with the extracted dependencies.",
    )
    clickhouse2deps_parser.add_argument(
        "-f",
        "--force",
        action="store_true",
        help=
        "Boolean indicating whether to overwrite the existing ASDF model specified by "
        "-o/--output-path.",
    )
    clickhouse2deps_parser.add_argument("--user",
                                        default="default",
                                        help="Username for the DB.")
    clickhouse2deps_parser.add_argument("--password",
                                        default="",
                                        help="Password for the DB.")
    clickhouse2deps_parser.add_argument("--host",
                                        default="0.0.0.0",
                                        help="Host for the DB.")
    clickhouse2deps_parser.add_argument("--port",
                                        default=9000,
                                        type=int,
                                        help="Port for the DB.")
    clickhouse2deps_parser.add_argument("--database",
                                        default="default",
                                        help="Database name for the DB.")
    clickhouse2deps_parser.add_argument("--table",
                                        default="uasts",
                                        help="Table name for the DB.")
    clickhouse2deps_parser.add_argument(
        "--langs",
        nargs="+",
        default=CLICKHOUSE_LANGS,
        choices=CLICKHOUSE_LANGS,
        help="Languages to consider while extracting dependencies.",
    )
    # --------------------------------------------------------------------------------------------

    collect_stdlibs_parser = add_parser(
        "collect-stdlibs",
        "Collect the lists of standard libraries for each language Babelfish can parse.",
    )
    collect_stdlibs_parser.set_defaults(handler=collect_stdlibs)
    collect_stdlibs_parser.add_argument(
        "-o",
        "--output-path",
        type=Path,
        help=
        "Output path to the resulting ASDF model with the extracted standard libraries.",
    )
    collect_stdlibs_parser.add_argument(
        "-f",
        "--force",
        action="store_true",
        help=
        "Boolean indicating whether to overwrite the existing ASDF model specified by "
        "-o/--output-path.",
    )
    args = parser.parse_args()
    if not hasattr(args, "handler"):
        args.handler = lambda _: parser.print_usage()  # noqa: E731
    return args
Esempio n. 6
0
def create_parser() -> ArgumentParser:
    """
    Create a parser for the lookout.style.typos utility.

    :return: an ArgumentParser with an handler defined in the handler attribute.
    """
    parser = ArgumentParser(
        formatter_class=ArgumentDefaultsHelpFormatterNoNone)

    # General options
    slogging.add_logging_args(parser)
    subparsers = parser.add_subparsers(help="Commands")

    def add_parser(name, help):
        return subparsers.add_parser(
            name,
            help=help,
            formatter_class=ArgumentDefaultsHelpFormatterNoNone)

    # Prepare raw data for corrector
    prepare_parser = add_parser("prepare-data",
                                "Prepare raw dataset for corrector training.")
    prepare_parser.set_defaults(handler=prepare_data)
    add_config_arg(prepare_parser)

    # Train new fasttext model on gien data
    fasttext_parser = add_parser(
        "train-fasttext", "Train fasttext model on the given dataset"
        "of code identifiers.")
    fasttext_parser.set_defaults(handler=cli_train_fasttext)
    add_data_path_arg(fasttext_parser)
    add_config_arg(fasttext_parser)

    # Create train and test datasets with artificial typos
    datasets_parser = add_parser(
        "get-datasets", "Create the train and the test datasets of typos.")
    datasets_parser.set_defaults(handler=cli_get_datasets)
    add_data_path_arg(datasets_parser)
    add_config_arg(datasets_parser)

    # Create, train and evaluate new corrector model
    train_parser = add_parser(
        "train", "Create and train TyposCorrector model on the given data.")
    train_parser.set_defaults(handler=cli_train_corrector)
    train_parser.add_argument(
        "--train",
        required=False,
        type=str,
        default=DEFAULT_CORRECTOR_CONFIG["datasets"]["train_path"],
        help=
        ".csv dump of a Dataframe with columns Columns.Split and Columns.Frequency.",
    )
    train_parser.add_argument(
        "--test",
        required=False,
        type=str,
        default=DEFAULT_CORRECTOR_CONFIG["datasets"]["test_path"],
        help=
        ".csv dump of a Dataframe with columns Columns.Split and Columns.Frequency.",
    )
    train_parser.add_argument(
        "-v",
        "--vocabulary-path",
        required=False,
        type=str,
        default=os.path.join(
            DEFAULT_CORRECTOR_CONFIG["preparation"]["data_dir"],
            DEFAULT_CORRECTOR_CONFIG["preparation"]["vocabulary_filename"]),
        help="Path to a .csv file with vocabulary.",
    )
    train_parser.add_argument(
        "-f",
        "--frequencies-path",
        required=False,
        type=str,
        default=os.path.join(
            DEFAULT_CORRECTOR_CONFIG["preparation"]["data_dir"],
            DEFAULT_CORRECTOR_CONFIG["preparation"]["frequencies_filename"]),
        help="Path to a .csv file with tokens' frequencies.",
    )
    train_parser.add_argument(
        "-e",
        "--fasttext-path",
        required=False,
        type=str,
        default=DEFAULT_CORRECTOR_CONFIG["fasttext"]["path"],
        help="Path to a FastText model's dump (.bin).",
    )
    add_config_arg(train_parser)
    add_corrector_path_arg(train_parser)

    ########################################
    # One command to rule them all
    ########################################
    train_from_scratch_parser = add_parser(
        "train-from-scratch",
        "Create and train TyposCorrector model on the given data.")
    train_from_scratch_parser.set_defaults(handler=train_from_scratch)
    add_config_arg(train_from_scratch_parser)

    # Report for Typo Commits Dataset
    typo_commits_report_parser = add_parser(
        "typo-commits-report", "Generate report for Typo Commits Dataset.")
    typo_commits_report_parser.set_defaults(
        handler=generate_typos_report_entry)
    add_config_arg(typo_commits_report_parser)
    typo_commits_report_parser.add_argument(
        "-i",
        "--dataset",
        required=True,
        help=
        "csv file with commits with typos. Must contain wrong_id, correct_id, file, line, "
        "commit_fix, repo, commit_typo columns. It is possible to specify the xz compressed "
        "file")
    typo_commits_report_parser.add_argument(
        "-o",
        "--output",
        required=True,
        help="Directory where to save results.")
    typo_commits_report_parser.add_argument("-b",
                                            "--bblfsh",
                                            help="Bblfsh address to use.")
    typo_commits_report_parser.add_argument(
        "--database",
        default=None,
        help="sqlite3 database path to store the models."
        "Temporary file is used if not set.")
    typo_commits_report_parser.add_argument(
        "--fs",
        default=None,
        help="Model repository file system root. "
        "Temporary directory is used if not set.")
    typo_commits_report_parser.add_argument(
        "--repos-cache",
        default=None,
        required=False,
        help=
        "Directory where to download repositories from the dataset. It is strongly \
              recommended to set this parameter if there are more than 20 repositories \
              in the dataset. Temporary directory is used if not set.")

    return parser
Esempio n. 7
0
def create_parser() -> ArgumentParser:
    """
    Create a parser for the lookout.style.typos utility.

    :return: an ArgumentParser with an handler defined in the handler attribute.
    """
    parser = ArgumentParser(
        formatter_class=ArgumentDefaultsHelpFormatterNoNone)

    # General options
    slogging.add_logging_args(parser)
    subparsers = parser.add_subparsers(help="Commands")

    def add_parser(name, help):
        return subparsers.add_parser(
            name,
            help=help,
            formatter_class=ArgumentDefaultsHelpFormatterNoNone)

    # Prepare raw data for corrector
    prepare_parser = add_parser("prepare-data",
                                "Prepare raw dataset for corrector training.")
    prepare_parser.set_defaults(handler=prepare_data)
    add_config_arg(prepare_parser)

    # Train new fasttext model on gien data
    fasttext_parser = add_parser(
        "train-fasttext", "Train fasttext model on the given dataset"
        "of code identifiers.")
    fasttext_parser.set_defaults(handler=cli_train_fasttext)
    add_data_path_arg(fasttext_parser)
    add_config_arg(fasttext_parser)

    # Create train and test datasets with artificial typos
    datasets_parser = add_parser(
        "get-datasets", "Create the train and the test datasets of typos.")
    datasets_parser.set_defaults(handler=cli_get_datasets)
    add_data_path_arg(datasets_parser)
    add_config_arg(datasets_parser)

    # Create, train and evaluate new corrector model
    train_parser = add_parser(
        "train", "Create and train TyposCorrector model on the given data.")
    train_parser.set_defaults(handler=cli_train_corrector)
    train_parser.add_argument(
        "--train",
        required=False,
        type=str,
        default=DEFAULT_CONFIG["datasets"]["train_path"],
        help=
        ".csv dump of a Dataframe with columns Columns.Split and Columns.Frequency.",
    )
    train_parser.add_argument(
        "--test",
        required=False,
        type=str,
        default=DEFAULT_CONFIG["datasets"]["test_path"],
        help=
        ".csv dump of a Dataframe with columns Columns.Split and Columns.Frequency.",
    )
    train_parser.add_argument(
        "-v",
        "--vocabulary-path",
        required=False,
        type=str,
        default=os.path.join(
            DEFAULT_CONFIG["preparation"]["data_dir"],
            DEFAULT_CONFIG["preparation"]["vocabulary_filename"]),
        help="Path to a .csv file with vocabulary.",
    )
    train_parser.add_argument(
        "-f",
        "--frequencies-path",
        required=False,
        type=str,
        default=os.path.join(
            DEFAULT_CONFIG["preparation"]["data_dir"],
            DEFAULT_CONFIG["preparation"]["frequencies_filename"]),
        help="Path to a .csv file with tokens' frequencies.",
    )
    train_parser.add_argument(
        "-e",
        "--fasttext-path",
        required=False,
        type=str,
        default=DEFAULT_CONFIG["fasttext"]["path"],
        help="Path to a FastText model's dump (.bin).",
    )
    add_corrector_path_arg(train_parser)

    ########################################
    # One command to rule them all
    ########################################
    train_from_scratch_parser = add_parser(
        "train-from-scratch",
        "Create and train TyposCorrector model on the given data.")
    train_from_scratch_parser.set_defaults(handler=train_from_scratch)
    add_config_arg(train_from_scratch_parser)

    return parser