예제 #1
0
def main():
    #tf.config.list_physical_devices("GPU")
    os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
    #--data-params-override '{"max_nodes_per_batch": 5000}'

    parser = get_train_cli_arg_parser()
    args, potential_hyperdrive_args = parser.parse_known_args()

    # os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
    hyperdrive_hyperparameter_overrides = None
    if args.hyperdrive_arg_parse and len(potential_hyperdrive_args) % 2 == 0:
        # Allow parsing params specified as "--key value" as well as "key value"
        hyperdrive_hyperparameter_overrides = {
            param.replace("--", ""): value
            for param, value in zip(potential_hyperdrive_args[::2],
                                    potential_hyperdrive_args[1::2])
        }
    elif len(potential_hyperdrive_args) > 0:
        # Reparse to throw standard error message:
        args = parser.parse_args()

    # Make TF less noisy:
    os.environ["TF_CPP_MIN_LOG_LEVEL"] = "1"
    tf.get_logger().setLevel("ERROR")

    run_and_debug(
        lambda: run_train_from_args(args, hyperdrive_hyperparameter_overrides),
        args.debug)
예제 #2
0
    def fit(self, train_data: List[Dict[str, Any]],
            validation_data: List[Dict[str, Any]]):
        """ Fit the model to the train and validation data"""
        os.environ["TF_CPP_MIN_LOG_LEVEL"] = "1"
        tf.get_logger().setLevel("ERROR")

        run_and_debug(lambda: self._run_train(train_data, validation_data),
                      self.debug)
예제 #3
0
파일: test.py 프로젝트: mikechen66/gnn-tf2
def run():
    import argparse

    parser = argparse.ArgumentParser(description="Test a GNN model.")
    parser.add_argument(
        "TRAINED_MODEL",
        type=str,
        help="File to load model from (determines model architecture & task).",
    )
    parser.add_argument("DATA_PATH",
                        type=str,
                        help="Directory containing the task data.")
    parser.add_argument(
        "--model-params-override",
        dest="model_param_override",
        type=str,
        help="JSON dictionary overriding model hyperparameter values.",
    )
    parser.add_argument(
        "--data-params-override",
        dest="data_param_override",
        type=str,
        help="JSON dictionary overriding data hyperparameter values.",
    )
    parser.add_argument(
        "--azure-info",
        dest="azure_info",
        type=str,
        default="azure_auth.json",
        help="Azure authentication information file (JSON).",
    )
    parser.add_argument(
        "--quiet",
        dest="quiet",
        action="store_true",
        help="Generate less output during testing.",
    )
    parser.add_argument("--debug",
                        dest="debug",
                        action="store_true",
                        help="Enable debug routines")
    args = parser.parse_args()

    # Shut up tensorflow:
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'
    tf.get_logger().setLevel("ERROR")
    import warnings
    warnings.simplefilter("ignore")

    run_and_debug(lambda: run_from_args(args), args.debug)
예제 #4
0
def run():
    parser = get_train_cli_arg_parser()
    args, potential_hyperdrive_args = parser.parse_known_args()

    hyperdrive_hyperparameter_overrides = None
    if args.hyperdrive_arg_parse and len(potential_hyperdrive_args) % 2 == 0:
        # Allow parsing params specified as "--key value" as well as "key value"
        hyperdrive_hyperparameter_overrides = {
            param.replace("--", ""): value
            for param, value in zip(potential_hyperdrive_args[::2], potential_hyperdrive_args[1::2])
        }
    elif len(potential_hyperdrive_args) > 0:
        # Reparse to throw standard error message:
        args = parser.parse_args()

    # Make TF less noisy:
    os.environ["TF_CPP_MIN_LOG_LEVEL"] = "1"
    tf.get_logger().setLevel("ERROR")

    run_and_debug(
        lambda: run_train_from_args(args, hyperdrive_hyperparameter_overrides), args.debug
    )
예제 #5
0
        optimizer_creator=create_optimizer_,
        clip_gradient_norm=1,
        target_validation_metric="Accuracy",
        target_validation_metric_higher_is_better=True,
        enable_amp=arguments["--amp"],
    )
    if nn is not None:
        trainer.neural_module = nn

    world_size = int(arguments["--world-size"])
    if world_size == -1:
        world_size = torch.cuda.device_count()

    trainer.distributed_train(
        world_size,
        training_data,
        validation_data,
        initialize_metadata=initialize_metadata,
        parallelize=not arguments["--sequential-run"],
        validate_on_start=True,
        shuffle_training_data=True,
        patience=10,
        worker_init=worker_init,
        start_epoch_idx=current_epoch_idx,
    )


if __name__ == "__main__":
    args = docopt(__doc__)
    run_and_debug(lambda: run(args), args.get("--debug", False))
예제 #6
0
파일: test.py 프로젝트: yyht/tf-gnn-samples
    --quiet                         Show less output.
    --debug                         Turn on debugger.
"""
from typing import Optional

from docopt import docopt
from dpu_utils.utils import run_and_debug, RichPath

from utils.model_utils import restore


def test(model_path: str, test_data_path: Optional[RichPath], result_dir: str, quiet: bool = False):
    model = restore(model_path, result_dir)
    test_data_path = test_data_path or RichPath.create(model.task.default_data_path())
    model.test(test_data_path)


def run(args):
    azure_info_path = args.get('--azure-info', None)
    model_path = args['STORED_MODEL_PATH']
    test_data_path = args.get('DATA_PATH')
    if test_data_path is not None:
        test_data_path = RichPath.create(test_data_path, azure_info_path)
    result_dir = args.get('--result_dir', 'trained_models')
    test(model_path, test_data_path, result_dir, quiet=args.get('--quiet'))


if __name__ == "__main__":
    args = docopt(__doc__)
    run_and_debug(lambda: run(args), enable_debugging=args['--debug'])
def run(arguments) -> None:
    print("Loading data ...")
    model = LanguageModel.restore(arguments["TRAINED_MODEL"])
    print(f"  Loaded trained model from {arguments['TRAINED_MODEL']}.")

    test_data = load_data_from_dir(
        model.vocab,
        length=model.hyperparameters["max_seq_length"],
        data_dir=arguments["TEST_DATA_DIR"],
        max_num_files=arguments.get("--max-num-files"),
    )
    print(
        f"  Loaded {test_data.shape[0]} test samples from {arguments['TEST_DATA_DIR']}."
    )

    test_loss, test_acc = model.run_one_epoch(
        get_minibatch_iterator(
            test_data,
            model.hyperparameters["batch_size"],
            is_training=False,
            drop_remainder=False,
        ),
        training=False,
    )
    print(f"Test:  Loss {test_loss:.4f}, Acc {test_acc:.3f}")


if __name__ == "__main__":
    args = docopt(__doc__)
    run_and_debug(lambda: run(args), args["--debug"])
예제 #8
0
sys.path.append(os.path.join(os.path.dirname(__file__), ".."))


def run_indexing(model_path: RichPath, index_data_path: RichPath):
    test_hyper_overrides = {
        'run_id': 'indexing',
        "dropout_keep_rate": 1.0,
    }

    data_chunks = index_data_path.get_filtered_files_in_dir('*.jsonl.gz')

    # Restore model
    model = model_restore_helper.restore(model_path,
                                         is_train=False,
                                         hyper_overrides=test_hyper_overrides)

    model.create_index(data_chunks)
    model.save(model_path)


def run(arguments):
    azure_info_path = arguments.get('--azure-info', None)
    data_folder = RichPath.create(arguments['DATA_PATH'], azure_info_path)
    model_path = RichPath.create(arguments['MODEL_PATH'])
    run_indexing(model_path, data_folder)


if __name__ == '__main__':
    args = docopt(__doc__)
    run_and_debug(lambda: run(args), args.get('--debug', False))
예제 #9
0
                                   save_folder,
                                   hyperparameters,
                                   azure_info_path,
                                   run_name,
                                   arguments['--quiet'],
                                   max_files_per_dir=max_files_per_dir,
                                   parallelize=not (arguments['--sequential']),
                                   random_sample_size=int(
                                       args['--random-sample-size']),
                                   random_data_dir_name=random_data_dir)

        if num_random_samples == 1:
            wandb.config['best_model_path'] = str(model_path)
            wandb.save(str(model_path.to_local_path()))

        if no_eval:
            continue
        # only limit files in test run if `--testrun` flag is passed by user.
        elif testrun:
            compute_evaluation_metrics(model_path, arguments, azure_info_path,
                                       valid_data_dirs, test_data_dirs,
                                       max_files_per_dir)
        else:
            compute_evaluation_metrics(model_path, arguments, azure_info_path,
                                       valid_data_dirs, test_data_dirs)


if __name__ == '__main__':
    args = docopt(__doc__)
    run_and_debug(lambda: run(args), args['--debug'])
예제 #10
0
#!/usr/bin/env python
"""
Usage:
    trainandtest.py [options] TRAIN_DATA_PATH VALID_DATA_PATH TEST_DATA_PATH MODEL_FILENAME

Options:
    --aml                      Run this in Azure ML
    --amp                      Enable automatic mixed precision.
    --azure-info=<path>        Azure authentication information file (JSON). Used to load data from Azure storage.
    --max-num-epochs=<epochs>  The maximum number of epochs to run training for. [default: 100]
    --minibatch-size=<size>    The minibatch size. [default: 300]
    --restore-path=<path>      The path to previous model file for starting from previous checkpoint.
    --sequential-run           Do not parallelize data loading. Makes debugging easier.
    --quiet                    Do not show progress bar.
    -h --help                  Show this screen.
    --debug                    Enable debug routines. [default: False]
"""

from docopt import docopt
from dpu_utils.utils import run_and_debug

from ptgnn.implementations.graph2seq import test, train

if __name__ == "__main__":
    args = docopt(__doc__)
    run_and_debug(lambda: train.run(args), args.get("--debug", False))
    run_and_debug(lambda: test.run(args), args.get("--debug", False))
예제 #11
0
                        )
            f.write('}\n')  # graph

    # endregion


def test_on_self():
    from glob import iglob
    import os
    lattice = TypeLatticeGenerator('../../metadata/typingRules.json')
    for fname in iglob('./testfiles/**/*.py', recursive=True):
        # for fname in iglob('/mnt/c/Users/t-mialla/Source/Repos/**/*.py', recursive=True):
        if os.path.isdir(fname): continue
        print(fname)

        with open(fname) as f:
            try:
                b = AstGraphGenerator(f.read(), lattice)
                b.build()
                b.to_dot(
                    'test.dot'
                )  #, draw_only_edge_types={EdgeType.NEXT_USE, EdgeType.OCCURRENCE_OF})
                import pdb
                pdb.set_trace()
            except SyntaxError:
                pass


if __name__ == '__main__':
    run_and_debug(test_on_self, True)
        print(f"  Loaded {valid_data[0].shape[0]} validation samples.")

        valid_data_iterator = get_minibatch_iterator(
            valid_data,
            batch_size=10,
            is_training=False,
            drop_remainder=True
        )

        aux = next(valid_data_iterator)
        good_predictions, bad_predictions, logits, targets = model.compute_loss_and_acc(
            model.compute_logits(tf.stack([*aux], axis=2), training=False),
            target_token_seq=aux[1],
            qualitative_results=True)

        good_predictions_counter = Counter(good_predictions.numpy())
        bad_predictions_counter = Counter(bad_predictions.numpy())

        print(f"GOOD predictions of model {args['--model']}")
        for node_id, count in good_predictions_counter.most_common(15):
            print("%5d   |   %15s" % (count, vocab_actions.get_name_for_id(node_id)))

        print(f"\nBAD predictions of model {args['--model']}")
        for node_id, count in bad_predictions_counter.most_common(15):
            print("%5d   |   %15s" % (count, vocab_actions.get_name_for_id(node_id)))


if __name__ == "__main__":
    args = docopt(__doc__)
    run_and_debug(lambda: evaluate(args), args["--debug"])
예제 #13
0
        number_variables += len(data['supernodes'])
        number_annotations += sum(
            1 for supernode in data['supernodes'].values()
            if supernode['annotation'] not in {None, 'None', 'Nothing', 'Any'})
        annotation_table.update((supernode['annotation']
                                 for supernode in data['supernodes'].values()
                                 if supernode['annotation'] not in
                                 {None, 'None', 'Nothing', 'Any'}))
    with open(output_path.to_local_path().path, "a") as f:
        f.write("Statistics for file: " + graph_path.to_local_path().path +
                "\n")
        f.write("Number of graphs: %d\n" % (number_graphs))
        f.write("Number of variables: %d\n" % (number_variables))
        f.write("Number of annotations: %d\n" % (number_annotations))
        f.write("Number of different annotations: %d\n" %
                (len(list(annotation_table))))
        f.write("\nFrequency distribution of annotations type:\n\n")
        for annotation, value in annotation_table.most_common():
            f.write("%s\t%d\n" % (annotation, value))


def run(arguments):
    graph_path = RichPath.create(arguments['GRAPH_PATH'])
    output_path = RichPath.create(arguments['OUTPUT_PATH'])
    run_stats(graph_path, output_path)


if __name__ == "__main__":
    args = docopt(__doc__)
    run_and_debug(lambda: run(args), True)