Ejemplo n.º 1
0
def test_Run_with_mock_module(
    disposable_log_db: log_database.Database,
    graph_db: graph_tuple_database.Database,
    epoch_count: int,
    k_fold: bool,
    run_with_memory_profiler: bool,
):
    """Test the run.Run() method."""
    log_db = disposable_log_db

    # Set the flags that determine the behaviour of Run().
    FLAGS.graph_db = flags_parsers.DatabaseFlag(graph_tuple_database.Database,
                                                graph_db.url,
                                                must_exist=True)
    FLAGS.log_db = flags_parsers.DatabaseFlag(log_database.Database,
                                              log_db.url,
                                              must_exist=True)
    FLAGS.epoch_count = epoch_count
    FLAGS.k_fold = k_fold
    FLAGS.run_with_memory_profiler = run_with_memory_profiler

    run.Run(MockModel)

    # Test that k-fold produces multiple runs.
    assert log_db.run_count == graph_db.split_count if k_fold else 1
Ejemplo n.º 2
0
def test_Run_test_only(
  disposable_log_db: log_database.Database,
  graph_db: graph_tuple_database.Database,
  k_fold: bool,
):
  """Test the run.Run() method."""
  log_db = disposable_log_db

  # Set the flags that determine the behaviour of Run().
  FLAGS.graph_db = flags_parsers.DatabaseFlag(
    graph_tuple_database.Database, graph_db.url, must_exist=True
  )
  FLAGS.log_db = flags_parsers.DatabaseFlag(
    log_database.Database, log_db.url, must_exist=True
  )
  FLAGS.test_only = True
  FLAGS.k_fold = k_fold

  run.Run(MockModel)

  # Test that k-fold produces multiple runs.
  assert log_db.run_count == graph_db.split_count if k_fold else 1

  run_ids = log_db.run_ids
  for run_id in run_ids:
    logs = log_analysis.RunLogAnalyzer(log_db=log_db, run_id=run_id)
    epochs = logs.tables["epochs"]

    # Check that we performed as many epochs as expected.
    assert 1 == len(epochs)
    test_count = len(epochs[epochs["test_accuracy"].notnull()])
    # Check that we produced a test result.
    assert test_count == 1
Ejemplo n.º 3
0
def Main():
    """Main entry point."""
    db_stem = FLAGS.db_stem
    models = FLAGS.model
    tag_suffix = FLAGS.tag_suffix
    datasets = FLAGS.dataset

    # Set model and dataset-invariant flags.
    FLAGS.log_db = flags_parsers.DatabaseFlag(log_database.Database,
                                              f"{db_stem}_devmap_logs",
                                              must_exist=True)
    FLAGS.ir_db = flags_parsers.DatabaseFlag(ir_database.Database,
                                             f"{db_stem}_ir",
                                             must_exist=True)
    FLAGS.k_fold = True
    FLAGS.test_on = "improvement_and_last"

    for dataset in datasets:
        # Set model-invariant flags.
        FLAGS.graph_db = flags_parsers.DatabaseFlag(
            graph_tuple_database.Database,
            f"{db_stem}_devmap_{dataset}",
            must_exist=True,
        )

        for model in models:
            FLAGS.tag = f"devmap_{dataset}_{model}_{tag_suffix}"

            if model == "zero_r":
                FLAGS.epoch_count = 1
                run.Run(zero_r.ZeroR)
            elif model == "lstm_opencl":
                FLAGS.epoch_count = 50
                FLAGS.ir2seq = flags_parsers.EnumFlag(lstm.Ir2SeqType,
                                                      lstm.Ir2SeqType.OPENCL)
                FLAGS.padded_sequence_length = 1024
                FLAGS.batch_size = 64
                run.Run(lstm.GraphLstm)
            elif model == "lstm_ir":
                FLAGS.epoch_count = 50
                FLAGS.ir2seq = flags_parsers.EnumFlag(lstm.Ir2SeqType,
                                                      lstm.Ir2SeqType.LLVM)
                FLAGS.padded_sequence_length = 15000
                FLAGS.batch_size = 64
                run.Run(lstm.GraphLstm)
            elif model == "lstm_inst2vec":
                FLAGS.epoch_count = 50
                FLAGS.ir2seq = flags_parsers.EnumFlag(lstm.Ir2SeqType,
                                                      lstm.Ir2SeqType.INST2VEC)
                FLAGS.padded_sequence_length = 15000
                FLAGS.batch_size = 64
                run.Run(lstm.GraphLstm)
            elif model == "ggnn":
                # Reduced batch size because OOM errors with larger batches on my
                # NVIDIA GTX 1080 GPU.
                FLAGS.graph_batch_size = 32
                FLAGS.epoch_count = 100
                run.Run(ggnn.Ggnn)
            else:
                raise app.UsageError(f"Unknown model: {model}")
Ejemplo n.º 4
0
def test_Run(
  disposable_log_db: log_database.Database,
  graph_db: graph_tuple_database.Database,
  k_fold: bool,
  run_with_memory_profiler: bool,
  test_on: str,
  stop_at: List[str],
):
  """Test the run.Run() method."""
  log_db = disposable_log_db

  # Set the flags that determine the behaviour of Run().
  FLAGS.graph_db = flags_parsers.DatabaseFlag(
    graph_tuple_database.Database, graph_db.url, must_exist=True
  )
  FLAGS.log_db = flags_parsers.DatabaseFlag(
    log_database.Database, log_db.url, must_exist=True
  )
  FLAGS.epoch_count = 3
  FLAGS.k_fold = k_fold
  FLAGS.run_with_memory_profiler = run_with_memory_profiler
  FLAGS.test_on = test_on
  FLAGS.stop_at = stop_at

  run.Run(MockModel)

  # Test that k-fold produces multiple runs.
  assert log_db.run_count == graph_db.split_count if k_fold else 1

  run_ids = log_db.run_ids
  for run_id in run_ids:
    logs = log_analysis.RunLogAnalyzer(log_db=log_db, run_id=run_id)
    epochs = logs.tables["epochs"]

    # Check that we performed as many epochs as expected. We can't check the
    # exact value because of --stop_at options.
    assert 1 <= len(epochs) <= FLAGS.epoch_count

    test_count = len(epochs[epochs["test_accuracy"].notnull()])

    # Test that the number of test epochs matches the expected amount depending
    # on --test_on flag.
    if test_on == "none":
      assert test_count == 0
    elif test_on == "best":
      assert test_count == 1
    elif test_on == "improvement":
      assert test_count >= 1
    elif test_on == "improvement_and_last":
      assert test_count >= 1
Ejemplo n.º 5
0
def Main():
  """Main entry point."""
  db_stem = FLAGS.db_stem
  models = FLAGS.model
  tag_suffix = FLAGS.tag_suffix
  datasets = FLAGS.dataset

  # Set model and dataset-invariant flags.
  FLAGS.log_db = flags_parsers.DatabaseFlag(
    log_database.Database,
    f"{db_stem}_dataflow_logs",
    must_exist=False,  # , must_exist=True
  )
  FLAGS.ir_db = flags_parsers.DatabaseFlag(
    ir_database.Database, f"{db_stem}_ir", must_exist=True
  )
  FLAGS.test_on = "improvement_and_last"
  FLAGS.max_train_per_epoch = 5000
  FLAGS.max_val_per_epoch = 1000

  for dataset in datasets:
    graph_db = graph_tuple_database.Database(
      f"{db_stem}_{dataset}", must_exist=True
    )
    FLAGS.graph_db = flags_parsers.DatabaseFlag(
      graph_tuple_database.Database, graph_db.url, must_exist=True,
    )

    # Use binary prec/rec/f1 scores for binary node classification tasks.
    if graph_db.node_y_dimensionality == 3:
      # alias_sets uses 3-D node labels:
      FLAGS.batch_scores_averaging_method = "weighted"
    elif graph_db.node_y_dimensionality == 2:
      # Binary node classification.
      FLAGS.batch_scores_averaging_method = "binary"
    else:
      raise ValueError(
        f"Unknown node dimensionality: {graph_db.node_y_dimensionality}"
      )

    # Liveness is identifier-based, all others are statement-based.
    if dataset == "liveness":
      FLAGS.nodes = flags_parsers.EnumFlag(
        lstm.NodeEncoder, lstm.NodeEncoder.IDENTIFIER
      )
    else:
      FLAGS.nodes = flags_parsers.EnumFlag(
        lstm.NodeEncoder, lstm.NodeEncoder.STATEMENT
      )

    for model in models:
      FLAGS.tag = f"{dataset}_{model}_{tag_suffix}"

      if model == "zero_r":
        FLAGS.epoch_count = 1
        FLAGS.graph_reader_order = "in_order"
        run.Run(zero_r.ZeroR)
      elif model == "lstm_ir":
        FLAGS.epoch_count = 50
        FLAGS.ir2seq = flags_parsers.EnumFlag(
          lstm.Ir2SeqType, lstm.Ir2SeqType.LLVM
        )
        FLAGS.graph_reader_order = "batch_random"
        FLAGS.padded_sequence_length = 15000
        FLAGS.batch_size = 64
        run.Run(lstm.GraphLstm)
      elif model == "lstm_inst2vec":
        FLAGS.epoch_count = 50
        FLAGS.ir2seq = flags_parsers.EnumFlag(
          lstm.Ir2SeqType, lstm.Ir2SeqType.INST2VEC
        )
        FLAGS.graph_reader_order = "batch_random"
        FLAGS.padded_sequence_length = 15000
        FLAGS.batch_size = 64
        run.Run(lstm.GraphLstm)
      elif model == "ggnn":
        FLAGS.layer_timesteps = ["30"]
        FLAGS.graph_batch_node_count = 15000
        FLAGS.graph_reader_order = "global_random"
        FLAGS.epoch_count = 300
        run.Run(ggnn.Ggnn)
      else:
        raise app.UsageError(f"Unknown model: {model}")