def main(): #tf.config.list_physical_devices("GPU") os.environ["CUDA_VISIBLE_DEVICES"] = "-1" #--data-params-override '{"max_nodes_per_batch": 5000}' parser = get_train_cli_arg_parser() args, potential_hyperdrive_args = parser.parse_known_args() # os.environ["CUDA_VISIBLE_DEVICES"] = "-1" hyperdrive_hyperparameter_overrides = None if args.hyperdrive_arg_parse and len(potential_hyperdrive_args) % 2 == 0: # Allow parsing params specified as "--key value" as well as "key value" hyperdrive_hyperparameter_overrides = { param.replace("--", ""): value for param, value in zip(potential_hyperdrive_args[::2], potential_hyperdrive_args[1::2]) } elif len(potential_hyperdrive_args) > 0: # Reparse to throw standard error message: args = parser.parse_args() # Make TF less noisy: os.environ["TF_CPP_MIN_LOG_LEVEL"] = "1" tf.get_logger().setLevel("ERROR") run_and_debug( lambda: run_train_from_args(args, hyperdrive_hyperparameter_overrides), args.debug)
def fit(self, train_data: List[Dict[str, Any]], validation_data: List[Dict[str, Any]]): """ Fit the model to the train and validation data""" os.environ["TF_CPP_MIN_LOG_LEVEL"] = "1" tf.get_logger().setLevel("ERROR") run_and_debug(lambda: self._run_train(train_data, validation_data), self.debug)
def run(): import argparse parser = argparse.ArgumentParser(description="Test a GNN model.") parser.add_argument( "TRAINED_MODEL", type=str, help="File to load model from (determines model architecture & task).", ) parser.add_argument("DATA_PATH", type=str, help="Directory containing the task data.") parser.add_argument( "--model-params-override", dest="model_param_override", type=str, help="JSON dictionary overriding model hyperparameter values.", ) parser.add_argument( "--data-params-override", dest="data_param_override", type=str, help="JSON dictionary overriding data hyperparameter values.", ) parser.add_argument( "--azure-info", dest="azure_info", type=str, default="azure_auth.json", help="Azure authentication information file (JSON).", ) parser.add_argument( "--quiet", dest="quiet", action="store_true", help="Generate less output during testing.", ) parser.add_argument("--debug", dest="debug", action="store_true", help="Enable debug routines") args = parser.parse_args() # Shut up tensorflow: os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1' tf.get_logger().setLevel("ERROR") import warnings warnings.simplefilter("ignore") run_and_debug(lambda: run_from_args(args), args.debug)
def run(): parser = get_train_cli_arg_parser() args, potential_hyperdrive_args = parser.parse_known_args() hyperdrive_hyperparameter_overrides = None if args.hyperdrive_arg_parse and len(potential_hyperdrive_args) % 2 == 0: # Allow parsing params specified as "--key value" as well as "key value" hyperdrive_hyperparameter_overrides = { param.replace("--", ""): value for param, value in zip(potential_hyperdrive_args[::2], potential_hyperdrive_args[1::2]) } elif len(potential_hyperdrive_args) > 0: # Reparse to throw standard error message: args = parser.parse_args() # Make TF less noisy: os.environ["TF_CPP_MIN_LOG_LEVEL"] = "1" tf.get_logger().setLevel("ERROR") run_and_debug( lambda: run_train_from_args(args, hyperdrive_hyperparameter_overrides), args.debug )
optimizer_creator=create_optimizer_, clip_gradient_norm=1, target_validation_metric="Accuracy", target_validation_metric_higher_is_better=True, enable_amp=arguments["--amp"], ) if nn is not None: trainer.neural_module = nn world_size = int(arguments["--world-size"]) if world_size == -1: world_size = torch.cuda.device_count() trainer.distributed_train( world_size, training_data, validation_data, initialize_metadata=initialize_metadata, parallelize=not arguments["--sequential-run"], validate_on_start=True, shuffle_training_data=True, patience=10, worker_init=worker_init, start_epoch_idx=current_epoch_idx, ) if __name__ == "__main__": args = docopt(__doc__) run_and_debug(lambda: run(args), args.get("--debug", False))
--quiet Show less output. --debug Turn on debugger. """ from typing import Optional from docopt import docopt from dpu_utils.utils import run_and_debug, RichPath from utils.model_utils import restore def test(model_path: str, test_data_path: Optional[RichPath], result_dir: str, quiet: bool = False): model = restore(model_path, result_dir) test_data_path = test_data_path or RichPath.create(model.task.default_data_path()) model.test(test_data_path) def run(args): azure_info_path = args.get('--azure-info', None) model_path = args['STORED_MODEL_PATH'] test_data_path = args.get('DATA_PATH') if test_data_path is not None: test_data_path = RichPath.create(test_data_path, azure_info_path) result_dir = args.get('--result_dir', 'trained_models') test(model_path, test_data_path, result_dir, quiet=args.get('--quiet')) if __name__ == "__main__": args = docopt(__doc__) run_and_debug(lambda: run(args), enable_debugging=args['--debug'])
def run(arguments) -> None: print("Loading data ...") model = LanguageModel.restore(arguments["TRAINED_MODEL"]) print(f" Loaded trained model from {arguments['TRAINED_MODEL']}.") test_data = load_data_from_dir( model.vocab, length=model.hyperparameters["max_seq_length"], data_dir=arguments["TEST_DATA_DIR"], max_num_files=arguments.get("--max-num-files"), ) print( f" Loaded {test_data.shape[0]} test samples from {arguments['TEST_DATA_DIR']}." ) test_loss, test_acc = model.run_one_epoch( get_minibatch_iterator( test_data, model.hyperparameters["batch_size"], is_training=False, drop_remainder=False, ), training=False, ) print(f"Test: Loss {test_loss:.4f}, Acc {test_acc:.3f}") if __name__ == "__main__": args = docopt(__doc__) run_and_debug(lambda: run(args), args["--debug"])
sys.path.append(os.path.join(os.path.dirname(__file__), "..")) def run_indexing(model_path: RichPath, index_data_path: RichPath): test_hyper_overrides = { 'run_id': 'indexing', "dropout_keep_rate": 1.0, } data_chunks = index_data_path.get_filtered_files_in_dir('*.jsonl.gz') # Restore model model = model_restore_helper.restore(model_path, is_train=False, hyper_overrides=test_hyper_overrides) model.create_index(data_chunks) model.save(model_path) def run(arguments): azure_info_path = arguments.get('--azure-info', None) data_folder = RichPath.create(arguments['DATA_PATH'], azure_info_path) model_path = RichPath.create(arguments['MODEL_PATH']) run_indexing(model_path, data_folder) if __name__ == '__main__': args = docopt(__doc__) run_and_debug(lambda: run(args), args.get('--debug', False))
save_folder, hyperparameters, azure_info_path, run_name, arguments['--quiet'], max_files_per_dir=max_files_per_dir, parallelize=not (arguments['--sequential']), random_sample_size=int( args['--random-sample-size']), random_data_dir_name=random_data_dir) if num_random_samples == 1: wandb.config['best_model_path'] = str(model_path) wandb.save(str(model_path.to_local_path())) if no_eval: continue # only limit files in test run if `--testrun` flag is passed by user. elif testrun: compute_evaluation_metrics(model_path, arguments, azure_info_path, valid_data_dirs, test_data_dirs, max_files_per_dir) else: compute_evaluation_metrics(model_path, arguments, azure_info_path, valid_data_dirs, test_data_dirs) if __name__ == '__main__': args = docopt(__doc__) run_and_debug(lambda: run(args), args['--debug'])
#!/usr/bin/env python """ Usage: trainandtest.py [options] TRAIN_DATA_PATH VALID_DATA_PATH TEST_DATA_PATH MODEL_FILENAME Options: --aml Run this in Azure ML --amp Enable automatic mixed precision. --azure-info=<path> Azure authentication information file (JSON). Used to load data from Azure storage. --max-num-epochs=<epochs> The maximum number of epochs to run training for. [default: 100] --minibatch-size=<size> The minibatch size. [default: 300] --restore-path=<path> The path to previous model file for starting from previous checkpoint. --sequential-run Do not parallelize data loading. Makes debugging easier. --quiet Do not show progress bar. -h --help Show this screen. --debug Enable debug routines. [default: False] """ from docopt import docopt from dpu_utils.utils import run_and_debug from ptgnn.implementations.graph2seq import test, train if __name__ == "__main__": args = docopt(__doc__) run_and_debug(lambda: train.run(args), args.get("--debug", False)) run_and_debug(lambda: test.run(args), args.get("--debug", False))
) f.write('}\n') # graph # endregion def test_on_self(): from glob import iglob import os lattice = TypeLatticeGenerator('../../metadata/typingRules.json') for fname in iglob('./testfiles/**/*.py', recursive=True): # for fname in iglob('/mnt/c/Users/t-mialla/Source/Repos/**/*.py', recursive=True): if os.path.isdir(fname): continue print(fname) with open(fname) as f: try: b = AstGraphGenerator(f.read(), lattice) b.build() b.to_dot( 'test.dot' ) #, draw_only_edge_types={EdgeType.NEXT_USE, EdgeType.OCCURRENCE_OF}) import pdb pdb.set_trace() except SyntaxError: pass if __name__ == '__main__': run_and_debug(test_on_self, True)
print(f" Loaded {valid_data[0].shape[0]} validation samples.") valid_data_iterator = get_minibatch_iterator( valid_data, batch_size=10, is_training=False, drop_remainder=True ) aux = next(valid_data_iterator) good_predictions, bad_predictions, logits, targets = model.compute_loss_and_acc( model.compute_logits(tf.stack([*aux], axis=2), training=False), target_token_seq=aux[1], qualitative_results=True) good_predictions_counter = Counter(good_predictions.numpy()) bad_predictions_counter = Counter(bad_predictions.numpy()) print(f"GOOD predictions of model {args['--model']}") for node_id, count in good_predictions_counter.most_common(15): print("%5d | %15s" % (count, vocab_actions.get_name_for_id(node_id))) print(f"\nBAD predictions of model {args['--model']}") for node_id, count in bad_predictions_counter.most_common(15): print("%5d | %15s" % (count, vocab_actions.get_name_for_id(node_id))) if __name__ == "__main__": args = docopt(__doc__) run_and_debug(lambda: evaluate(args), args["--debug"])
number_variables += len(data['supernodes']) number_annotations += sum( 1 for supernode in data['supernodes'].values() if supernode['annotation'] not in {None, 'None', 'Nothing', 'Any'}) annotation_table.update((supernode['annotation'] for supernode in data['supernodes'].values() if supernode['annotation'] not in {None, 'None', 'Nothing', 'Any'})) with open(output_path.to_local_path().path, "a") as f: f.write("Statistics for file: " + graph_path.to_local_path().path + "\n") f.write("Number of graphs: %d\n" % (number_graphs)) f.write("Number of variables: %d\n" % (number_variables)) f.write("Number of annotations: %d\n" % (number_annotations)) f.write("Number of different annotations: %d\n" % (len(list(annotation_table)))) f.write("\nFrequency distribution of annotations type:\n\n") for annotation, value in annotation_table.most_common(): f.write("%s\t%d\n" % (annotation, value)) def run(arguments): graph_path = RichPath.create(arguments['GRAPH_PATH']) output_path = RichPath.create(arguments['OUTPUT_PATH']) run_stats(graph_path, output_path) if __name__ == "__main__": args = docopt(__doc__) run_and_debug(lambda: run(args), True)