def Main(): """Main entry point.""" db_stem = FLAGS.db_stem models = FLAGS.model tag_suffix = FLAGS.tag_suffix datasets = FLAGS.dataset # Set model and dataset-invariant flags. FLAGS.log_db = flags_parsers.DatabaseFlag(log_database.Database, f"{db_stem}_devmap_logs", must_exist=True) FLAGS.ir_db = flags_parsers.DatabaseFlag(ir_database.Database, f"{db_stem}_ir", must_exist=True) FLAGS.k_fold = True FLAGS.test_on = "improvement_and_last" for dataset in datasets: # Set model-invariant flags. FLAGS.graph_db = flags_parsers.DatabaseFlag( graph_tuple_database.Database, f"{db_stem}_devmap_{dataset}", must_exist=True, ) for model in models: FLAGS.tag = f"devmap_{dataset}_{model}_{tag_suffix}" if model == "zero_r": FLAGS.epoch_count = 1 run.Run(zero_r.ZeroR) elif model == "lstm_opencl": FLAGS.epoch_count = 50 FLAGS.ir2seq = flags_parsers.EnumFlag(lstm.Ir2SeqType, lstm.Ir2SeqType.OPENCL) FLAGS.padded_sequence_length = 1024 FLAGS.batch_size = 64 run.Run(lstm.GraphLstm) elif model == "lstm_ir": FLAGS.epoch_count = 50 FLAGS.ir2seq = flags_parsers.EnumFlag(lstm.Ir2SeqType, lstm.Ir2SeqType.LLVM) FLAGS.padded_sequence_length = 15000 FLAGS.batch_size = 64 run.Run(lstm.GraphLstm) elif model == "lstm_inst2vec": FLAGS.epoch_count = 50 FLAGS.ir2seq = flags_parsers.EnumFlag(lstm.Ir2SeqType, lstm.Ir2SeqType.INST2VEC) FLAGS.padded_sequence_length = 15000 FLAGS.batch_size = 64 run.Run(lstm.GraphLstm) elif model == "ggnn": # Reduced batch size because OOM errors with larger batches on my # NVIDIA GTX 1080 GPU. FLAGS.graph_batch_size = 32 FLAGS.epoch_count = 100 run.Run(ggnn.Ggnn) else: raise app.UsageError(f"Unknown model: {model}")
def test_Run_with_mock_module( disposable_log_db: log_database.Database, graph_db: graph_tuple_database.Database, epoch_count: int, k_fold: bool, run_with_memory_profiler: bool, ): """Test the run.Run() method.""" log_db = disposable_log_db # Set the flags that determine the behaviour of Run(). FLAGS.graph_db = flags_parsers.DatabaseFlag(graph_tuple_database.Database, graph_db.url, must_exist=True) FLAGS.log_db = flags_parsers.DatabaseFlag(log_database.Database, log_db.url, must_exist=True) FLAGS.epoch_count = epoch_count FLAGS.k_fold = k_fold FLAGS.run_with_memory_profiler = run_with_memory_profiler run.Run(MockModel) # Test that k-fold produces multiple runs. assert log_db.run_count == graph_db.split_count if k_fold else 1
def test_Run_test_only( disposable_log_db: log_database.Database, graph_db: graph_tuple_database.Database, k_fold: bool, ): """Test the run.Run() method.""" log_db = disposable_log_db # Set the flags that determine the behaviour of Run(). FLAGS.graph_db = flags_parsers.DatabaseFlag( graph_tuple_database.Database, graph_db.url, must_exist=True ) FLAGS.log_db = flags_parsers.DatabaseFlag( log_database.Database, log_db.url, must_exist=True ) FLAGS.test_only = True FLAGS.k_fold = k_fold run.Run(MockModel) # Test that k-fold produces multiple runs. assert log_db.run_count == graph_db.split_count if k_fold else 1 run_ids = log_db.run_ids for run_id in run_ids: logs = log_analysis.RunLogAnalyzer(log_db=log_db, run_id=run_id) epochs = logs.tables["epochs"] # Check that we performed as many epochs as expected. assert 1 == len(epochs) test_count = len(epochs[epochs["test_accuracy"].notnull()]) # Check that we produced a test result. assert test_count == 1
def main(): """Main entry point.""" graph_db: graph_tuple_database.Database = FLAGS.graph_db() if graph_db.node_y_dimensionality: model_class = node_lstm.NodeLstm else: model_class = graph_lstm.GraphLstm run.Run(model_class, graph_db)
def test_Run( disposable_log_db: log_database.Database, graph_db: graph_tuple_database.Database, k_fold: bool, run_with_memory_profiler: bool, test_on: str, stop_at: List[str], ): """Test the run.Run() method.""" log_db = disposable_log_db # Set the flags that determine the behaviour of Run(). FLAGS.graph_db = flags_parsers.DatabaseFlag( graph_tuple_database.Database, graph_db.url, must_exist=True ) FLAGS.log_db = flags_parsers.DatabaseFlag( log_database.Database, log_db.url, must_exist=True ) FLAGS.epoch_count = 3 FLAGS.k_fold = k_fold FLAGS.run_with_memory_profiler = run_with_memory_profiler FLAGS.test_on = test_on FLAGS.stop_at = stop_at run.Run(MockModel) # Test that k-fold produces multiple runs. assert log_db.run_count == graph_db.split_count if k_fold else 1 run_ids = log_db.run_ids for run_id in run_ids: logs = log_analysis.RunLogAnalyzer(log_db=log_db, run_id=run_id) epochs = logs.tables["epochs"] # Check that we performed as many epochs as expected. We can't check the # exact value because of --stop_at options. assert 1 <= len(epochs) <= FLAGS.epoch_count test_count = len(epochs[epochs["test_accuracy"].notnull()]) # Test that the number of test epochs matches the expected amount depending # on --test_on flag. if test_on == "none": assert test_count == 0 elif test_on == "best": assert test_count == 1 elif test_on == "improvement": assert test_count >= 1 elif test_on == "improvement_and_last": assert test_count >= 1
def main(): """Main entry point.""" run.Run(Ggnn)
def Main(): """Main entry point.""" db_stem = FLAGS.db_stem models = FLAGS.model tag_suffix = FLAGS.tag_suffix datasets = FLAGS.dataset # Set model and dataset-invariant flags. FLAGS.log_db = flags_parsers.DatabaseFlag( log_database.Database, f"{db_stem}_dataflow_logs", must_exist=False, # , must_exist=True ) FLAGS.ir_db = flags_parsers.DatabaseFlag( ir_database.Database, f"{db_stem}_ir", must_exist=True ) FLAGS.test_on = "improvement_and_last" FLAGS.max_train_per_epoch = 5000 FLAGS.max_val_per_epoch = 1000 for dataset in datasets: graph_db = graph_tuple_database.Database( f"{db_stem}_{dataset}", must_exist=True ) FLAGS.graph_db = flags_parsers.DatabaseFlag( graph_tuple_database.Database, graph_db.url, must_exist=True, ) # Use binary prec/rec/f1 scores for binary node classification tasks. if graph_db.node_y_dimensionality == 3: # alias_sets uses 3-D node labels: FLAGS.batch_scores_averaging_method = "weighted" elif graph_db.node_y_dimensionality == 2: # Binary node classification. FLAGS.batch_scores_averaging_method = "binary" else: raise ValueError( f"Unknown node dimensionality: {graph_db.node_y_dimensionality}" ) # Liveness is identifier-based, all others are statement-based. if dataset == "liveness": FLAGS.nodes = flags_parsers.EnumFlag( lstm.NodeEncoder, lstm.NodeEncoder.IDENTIFIER ) else: FLAGS.nodes = flags_parsers.EnumFlag( lstm.NodeEncoder, lstm.NodeEncoder.STATEMENT ) for model in models: FLAGS.tag = f"{dataset}_{model}_{tag_suffix}" if model == "zero_r": FLAGS.epoch_count = 1 FLAGS.graph_reader_order = "in_order" run.Run(zero_r.ZeroR) elif model == "lstm_ir": FLAGS.epoch_count = 50 FLAGS.ir2seq = flags_parsers.EnumFlag( lstm.Ir2SeqType, lstm.Ir2SeqType.LLVM ) FLAGS.graph_reader_order = "batch_random" FLAGS.padded_sequence_length = 15000 FLAGS.batch_size = 64 run.Run(lstm.GraphLstm) elif model == "lstm_inst2vec": FLAGS.epoch_count = 50 FLAGS.ir2seq = flags_parsers.EnumFlag( lstm.Ir2SeqType, lstm.Ir2SeqType.INST2VEC ) FLAGS.graph_reader_order = "batch_random" FLAGS.padded_sequence_length = 15000 FLAGS.batch_size = 64 run.Run(lstm.GraphLstm) elif model == "ggnn": FLAGS.layer_timesteps = ["30"] FLAGS.graph_batch_node_count = 15000 FLAGS.graph_reader_order = "global_random" FLAGS.epoch_count = 300 run.Run(ggnn.Ggnn) else: raise app.UsageError(f"Unknown model: {model}")
def main(): """Main entry point.""" run.Run(ZeroR)