예제 #1
0
def test_GetAliasSetsByFunction_no_alias_sets():
    """Sample bytecode contains no alias sets."""
    alias_sets = opt_util.GetAliasSetsByFunction(SIMPLE_C_BYTECODE)
    assert "DoSomething" in alias_sets
    assert "main" in alias_sets
    assert len(alias_sets) == 2
    assert alias_sets["DoSomething"] == []
    assert alias_sets["main"] == []
예제 #2
0
def MakeAliasSetGraphs(
  g: nx.MultiDiGraph,
  bytecode: str,
  n: typing.Optional[int] = None,
  false=False,
  true=True,
) -> typing.Iterable[nx.MultiDiGraph]:
  """Produce up to `n` alias set graphs.

  Args:
    g: The unlabelled input graph.
    bytecode: The bytecode which produced the input graph.
    n: The maximum number of graphs to produce. Multiple graphs are produced by
      selecting different root pointers for alias sets. If `n` is provided,
      the number of graphs generated will be in the range
      1 <= x <= min(num_alias_sets, n), where num_alias_sets is the number of
      alias sets larger than --alias_set_min_size. If n is None, num_alias_sets
      graphs will be produced.
    false: TODO(github.com/ChrisCummins/ProGraML/issues/2): Unused. This method
      is hardcoded to use 3-class 1-hots.
    true: TODO(github.com/ChrisCummins/ProGraML/issues/2): Unused. This method
      is hardcoded to use 3-class 1-hots.

  Returns:
    A generator of annotated graphs, where each graph has 'x' and 'y' labels on
    the statement nodes, and additionally a 'data_flow_max_steps_required'
    attribute which is set to the number of pointers in the alias set.
  """
  # TODO(github.com/ChrisCummins/ProGraML/issues/2): Replace true/false args
  # with a list of class values for all graph annotator functions.
  del false
  del true

  # Build the alias sets for the given bytecode.
  alias_sets_by_function = opt_util.GetAliasSetsByFunction(bytecode)

  functions = {
    function
    for node, function in g.nodes(data="function")
    # Not all nodes have a 'function' attribute, e.g. the magic root node.
    if function
  }

  # Silently drop alias sets for functions which don't exist in the graph.
  alias_sets_to_delete = []
  for function in alias_sets_by_function:
    if function not in functions:
      alias_sets_to_delete.append(function)
  if alias_sets_to_delete:
    for function in alias_sets_to_delete:
      del alias_sets_by_function[function]
예제 #3
0
def test_GetAliasSetsByFunction_regression_test_1():
    """Regression test for bytecode which trips up pointer set size."""
    alias_sets = opt_util.GetAliasSetsByFunction("""
; ModuleID = '/scratch/talbn/classifyapp_code/train//71/149.txt.cpp'
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

%"class.std::ios_base::Init" = type { i8 }

@_ZStL8__ioinit = internal global %"class.std::ios_base::Init" zeroinitializer, align 1
@__dso_handle = external global i8
@_ZZ4mainE3yue = private unnamed_addr constant [11 x i32] [i32 31, i32 28, i32 31, i32 30, i32 31, i32 30, i32 31, i32 31, i32 30, i32 31, i32 30], align 16
@.str = private unnamed_addr constant [3 x i8] c"%d\00", align 1
@.str.1 = private unnamed_addr constant [9 x i8] c"%d %d %d\00", align 1
@.str.2 = private unnamed_addr constant [5 x i8] c"YES\0A\00", align 1
@.str.3 = private unnamed_addr constant [4 x i8] c"NO\0A\00", align 1
@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__sub_I_149.txt.cpp, i8* null }]
@str = private unnamed_addr constant [3 x i8] c"NO\00"
@str.4 = private unnamed_addr constant [4 x i8] c"YES\00"

define internal fastcc void @__cxx_global_var_init() #0 section ".text.startup" {
entry:
  tail call void @_ZNSt8ios_base4InitC1Ev(%"class.std::ios_base::Init"* nonnull @_ZStL8__ioinit)
  %0 = tail call i32 @__cxa_atexit(void (i8*)* bitcast (void (%"class.std::ios_base::Init"*)* @_ZNSt8ios_base4InitD1Ev to void (i8*)*), i8* getelementptr inbounds (%"class.std::ios_base::Init", %"class.std::ios_base::Init"* @_ZStL8__ioinit, i64 0, i32 0), i8* nonnull @__dso_handle) #2
  ret void
}

declare void @_ZNSt8ios_base4InitC1Ev(%"class.std::ios_base::Init"*) #0

; Function Attrs: nounwind
declare void @_ZNSt8ios_base4InitD1Ev(%"class.std::ios_base::Init"*) #1

; Function Attrs: nounwind
declare i32 @__cxa_atexit(void (i8*)*, i8*, i8*) #2

; Function Attrs: nounwind uwtable
define i32 @main() #3 {
entry:
  %n = alloca i32, align 4
  %nian = alloca i32, align 4
  %yue1 = alloca i32, align 4
  %yue2 = alloca i32, align 4
  %0 = bitcast i32* %n to i8*
  call void @llvm.lifetime.start(i64 4, i8* %0) #2
  %1 = bitcast i32* %nian to i8*
  call void @llvm.lifetime.start(i64 4, i8* %1) #2
  %2 = bitcast i32* %yue1 to i8*
  call void @llvm.lifetime.start(i64 4, i8* %2) #2
  %3 = bitcast i32* %yue2 to i8*
  call void @llvm.lifetime.start(i64 4, i8* %3) #2
  %call = call i32 (i8*, ...) @scanf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i64 0, i64 0), i32* nonnull %n)
  %4 = load i32, i32* %n, align 4, !tbaa !1
  %cmp.45 = icmp sgt i32 %4, 0
  br i1 %cmp.45, label %for.body, label %for.end.27

for.body:                                         ; preds = %entry, %for.inc.25
  %i.046 = phi i32 [ %inc26, %for.inc.25 ], [ 0, %entry ]
  %call1 = call i32 (i8*, ...) @scanf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.1, i64 0, i64 0), i32* nonnull %nian, i32* nonnull %yue1, i32* nonnull %yue2)
  %5 = load i32, i32* %yue1, align 4, !tbaa !1
  %6 = load i32, i32* %yue2, align 4, !tbaa !1
  %cmp2 = icmp sgt i32 %5, %6
  br i1 %cmp2, label %if.then, label %if.end

if.then:                                          ; preds = %for.body
  store i32 %5, i32* %yue2, align 4, !tbaa !1
  store i32 %6, i32* %yue1, align 4, !tbaa !1
  br label %if.end

if.end:                                           ; preds = %if.then, %for.body
  %7 = load i32, i32* %yue1, align 4, !tbaa !1
  %8 = load i32, i32* %yue2, align 4, !tbaa !1
  %cmp4.41 = icmp slt i32 %7, %8
  br i1 %cmp4.41, label %for.body.5.lr.ph, label %for.end

for.body.5.lr.ph:                                 ; preds = %if.end
  %9 = load i32, i32* %yue2, align 4, !tbaa !1
  %10 = sext i32 %7 to i64
  %11 = sext i32 %9 to i64
  br label %for.body.5

for.body.5:                                       ; preds = %for.body.5.lr.ph, %for.body.5
  %indvars.iv = phi i64 [ %10, %for.body.5.lr.ph ], [ %indvars.iv.next, %for.body.5 ]
  %sum.043 = phi i32 [ 0, %for.body.5.lr.ph ], [ %add, %for.body.5 ]
  %12 = add nsw i64 %indvars.iv, -1
  %arrayidx = getelementptr inbounds [11 x i32], [11 x i32]* @_ZZ4mainE3yue, i64 0, i64 %12
  %13 = load i32, i32* %arrayidx, align 4, !tbaa !1
  %add = add nsw i32 %13, %sum.043
  %indvars.iv.next = add nsw i64 %indvars.iv, 1
  %cmp4 = icmp slt i64 %indvars.iv.next, %11
  br i1 %cmp4, label %for.body.5, label %for.end

for.end:                                          ; preds = %for.body.5, %if.end
  %.lcssa = phi i32 [ %8, %if.end ], [ %9, %for.body.5 ]
  %sum.0.lcssa = phi i32 [ 0, %if.end ], [ %add, %for.body.5 ]
  %14 = load i32, i32* %yue1, align 4, !tbaa !1
  %cmp6 = icmp slt i32 %14, 3
  %cmp7 = icmp sgt i32 %.lcssa, 2
  %or.cond = and i1 %cmp7, %cmp6
  br i1 %or.cond, label %if.then.8, label %if.end.18

if.then.8:                                        ; preds = %for.end
  %15 = load i32, i32* %nian, align 4, !tbaa !1
  %rem38 = and i32 %15, 3
  %rem11 = srem i32 %15, 100
  %notlhs = icmp eq i32 %rem38, 0
  %notrhs = icmp ne i32 %rem11, 0
  %or.cond39.not = and i1 %notrhs, %notlhs
  %rem13 = srem i32 %15, 400
  %cmp14 = icmp eq i32 %rem13, 0
  %or.cond40 = or i1 %or.cond39.not, %cmp14
  %inc16 = zext i1 %or.cond40 to i32
  %inc16.sum.0 = add nsw i32 %inc16, %sum.0.lcssa
  br label %if.end.18

if.end.18:                                        ; preds = %if.then.8, %for.end
  %sum.1 = phi i32 [ %sum.0.lcssa, %for.end ], [ %inc16.sum.0, %if.then.8 ]
  %rem19 = srem i32 %sum.1, 7
  %cmp20 = icmp eq i32 %rem19, 0
  br i1 %cmp20, label %if.then.21, label %if.else

if.then.21:                                       ; preds = %if.end.18
  %puts37 = call i32 @puts(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @str.4, i64 0, i64 0))
  br label %for.inc.25

if.else:                                          ; preds = %if.end.18
  %puts = call i32 @puts(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @str, i64 0, i64 0))
  br label %for.inc.25

for.inc.25:                                       ; preds = %if.then.21, %if.else
  %inc26 = add nuw nsw i32 %i.046, 1
  %16 = load i32, i32* %n, align 4, !tbaa !1
  %cmp = icmp slt i32 %inc26, %16
  br i1 %cmp, label %for.body, label %for.end.27

for.end.27:                                       ; preds = %for.inc.25, %entry
  call void @llvm.lifetime.end(i64 4, i8* %3) #2
  call void @llvm.lifetime.end(i64 4, i8* %2) #2
  call void @llvm.lifetime.end(i64 4, i8* %1) #2
  call void @llvm.lifetime.end(i64 4, i8* %0) #2
  ret i32 0
}

; Function Attrs: nounwind
declare void @llvm.lifetime.start(i64, i8* nocapture) #2

; Function Attrs: nounwind
declare i32 @scanf(i8* nocapture readonly, ...) #1

; Function Attrs: nounwind
declare void @llvm.lifetime.end(i64, i8* nocapture) #2

define internal void @_GLOBAL__sub_I_149.txt.cpp() #0 section ".text.startup" {
entry:
  tail call fastcc void @__cxx_global_var_init()
  ret void
}

; Function Attrs: nounwind
declare i32 @puts(i8* nocapture readonly) #2

attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="broadwell" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+cmov,+cx16,+f16c,+fma,+fsgsbase,+hle,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512pf,-avx512vl,-fma4,-sha,-sse4a,-tbm,-xop" "unsafe-fp-math"="true" "use-soft-float"="false" }
attributes #1 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="broadwell" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+cmov,+cx16,+f16c,+fma,+fsgsbase,+hle,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512pf,-avx512vl,-fma4,-sha,-sse4a,-tbm,-xop" "unsafe-fp-math"="true" "use-soft-float"="false" }
attributes #2 = { nounwind }
attributes #3 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="broadwell" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+cmov,+cx16,+f16c,+fma,+fsgsbase,+hle,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512pf,-avx512vl,-fma4,-sha,-sse4a,-tbm,-xop" "unsafe-fp-math"="true" "use-soft-float"="false" }

!llvm.ident = !{!0}

!0 = !{!"clang version 3.7.1 (tags/RELEASE_371/final)"}
!1 = !{!2, !2, i64 0}
!2 = !{!"int", !3, i64 0}
!3 = !{!"omnipotent char", !4, i64 0}
!4 = !{!"Simple C/C++ TBAA"}
  """)
    assert len(alias_sets) == 1
예제 #4
0
def test_GetAliasSetsByFunction_aliases():
    """Sample bytecode that contains alias sets."""
    alias_sets = opt_util.GetAliasSetsByFunction("""
%struct.foo = type { i32 }

define i32 @A() #0 {
  %1 = alloca i32, align 4
  %2 = alloca i32, align 4
  %3 = alloca [2 x i8], align 1
  %4 = alloca [10 x i8], align 1
  %5 = alloca %struct.foo, align 4
  %6 = alloca i32*, align 8
  %7 = alloca %struct.foo*, align 8
  %8 = alloca i32*, align 8
  store i32 0, i32* %2, align 4
  br label %9

; <label>:9:                                      ; preds = %24, %0
  %10 = load i32, i32* %2, align 4
  %11 = icmp ne i32 %10, 10
  br i1 %11, label %12, label %27

; <label>:12:                                     ; preds = %9
  %13 = load i32, i32* %2, align 4
  %14 = sext i32 %13 to i64
  %15 = getelementptr inbounds [10 x i8], [10 x i8]* %4, i64 0, i64 %14
  %16 = load i8, i8* %15, align 1
  %17 = getelementptr inbounds [2 x i8], [2 x i8]* %3, i64 0, i64 0
  store i8 %16, i8* %17, align 1
  %18 = load i32, i32* %2, align 4
  %19 = sub nsw i32 9, %18
  %20 = sext i32 %19 to i64
  %21 = getelementptr inbounds [10 x i8], [10 x i8]* %4, i64 0, i64 %20
  %22 = load i8, i8* %21, align 1
  %23 = getelementptr inbounds [2 x i8], [2 x i8]* %3, i64 0, i64 1
  store i8 %22, i8* %23, align 1
  br label %24

; <label>:24:                                     ; preds = %12
  %25 = load i32, i32* %2, align 4
  %26 = add nsw i32 %25, 1
  store i32 %26, i32* %2, align 4
  br label %9

; <label>:27:                                     ; preds = %9
  %28 = getelementptr inbounds %struct.foo, %struct.foo* %5, i32 0, i32 0
  store i32* %28, i32** %6, align 8
  store %struct.foo* %5, %struct.foo** %7, align 8
  store i32* null, i32** %8, align 8
  %29 = load i32, i32* %1, align 4
  ret i32 %29
}
""")
    assert "A" in alias_sets
    assert len(alias_sets) == 1
    assert alias_sets["A"] == [
        opt_util.AliasSet(
            type="must alias",
            mod_ref="Mod/Ref",
            pointers=[opt_util.Pointer(type="i32*", identifier="%2", size=4)],
        ),
        opt_util.AliasSet(
            type="may alias",
            mod_ref="Ref",
            pointers=[
                opt_util.Pointer(type="i8*", identifier="%15", size=1),
                opt_util.Pointer(type="(i8*", identifier="%21", size=1),
            ],
        ),
        opt_util.AliasSet(
            type="must alias",
            mod_ref="Mod",
            pointers=[opt_util.Pointer(type="i8*", identifier="%17", size=1)],
        ),
        opt_util.AliasSet(
            type="must alias",
            mod_ref="Mod",
            pointers=[opt_util.Pointer(type="i8*", identifier="%23", size=1)],
        ),
        opt_util.AliasSet(
            type="must alias",
            mod_ref="Mod",
            pointers=[opt_util.Pointer(type="i32**", identifier="%6", size=8)],
        ),
        opt_util.AliasSet(
            type="must alias",
            mod_ref="Mod",
            pointers=[
                opt_util.Pointer(type="%struct.foo**", identifier="%7", size=8)
            ],
        ),
        opt_util.AliasSet(
            type="must alias",
            mod_ref="Mod",
            pointers=[opt_util.Pointer(type="i32**", identifier="%8", size=8)],
        ),
        opt_util.AliasSet(
            type="must alias",
            mod_ref="Ref",
            pointers=[opt_util.Pointer(type="i32*", identifier="%1", size=4)],
        ),
    ]
예제 #5
0
def MakeAliasSetGraphs(
  g: nx.MultiDiGraph,
  bytecode: str,
  n: typing.Optional[int] = None,
  false=False,
  true=True,
) -> typing.Iterable[nx.MultiDiGraph]:
  """Produce up to `n` alias set graphs.

  Args:
    g: The unlabelled input graph.
    bytecode: The bytecode which produced the input graph.
    n: The maximum number of graphs to produce. Multiple graphs are produced by
      selecting different root pointers for alias sets. If `n` is provided,
      the number of graphs generated will be in the range
      1 <= x <= min(num_alias_sets, n), where num_alias_sets is the number of
      alias sets larger than --alias_set_min_size. If n is None, num_alias_sets
      graphs will be produced.
    false: TODO(github.com/ChrisCummins/ProGraML/issues/2): Unused. This method
      is hardcoded to use 3-class 1-hots.
    true: TODO(github.com/ChrisCummins/ProGraML/issues/2): Unused. This method
      is hardcoded to use 3-class 1-hots.

  Returns:
    A generator of annotated graphs, where each graph has 'x' and 'y' labels on
    the statement nodes, and additionally a 'data_flow_max_steps_required'
    attribute which is set to the number of pointers in the alias set.
  """
  # TODO(github.com/ChrisCummins/ProGraML/issues/2): Replace true/false args
  # with a list of class values for all graph annotator functions.
  del false
  del true

  # Build the alias sets for the given bytecode.
  alias_sets_by_function = opt_util.GetAliasSetsByFunction(bytecode)

  functions = {
    function
    for node, function in g.nodes(data="function")
    # Not all nodes have a 'function' attribute, e.g. the magic root node.
    if function
  }

  # Silently drop alias sets for functions which don't exist in the graph.
  alias_sets_to_delete = []
  for function in alias_sets_by_function:
    if function not in functions:
      alias_sets_to_delete.append(function)
  if alias_sets_to_delete:
    for function in alias_sets_to_delete:
      del alias_sets_by_function[function]
    app.Log(
      2,
      "Removed %d alias sets generated from bytecode but not found in "
      "graph: %s",
      len(alias_sets_to_delete),
      alias_sets_to_delete,
    )

  function_alias_set_pairs: typing.List[
    typing.Tuple[str, opt_util.AliasSet]
  ] = []
  # Flatten the alias set dictionary and ignore any alias sets that are smaller
  # than the threshold size.
  for function, alias_sets in alias_sets_by_function.items():
    function_alias_set_pairs += [
      (function, alias_set)
      for alias_set in alias_sets
      if len(alias_set.pointers) >= FLAGS.alias_set_min_size
    ]

  # Select `n` random alias sets to generate labelled graphs for.
  if n and len(function_alias_set_pairs) > n:
    random.shuffle(function_alias_set_pairs)
    function_alias_set_pairs = function_alias_set_pairs[:n]

  for function, alias_set in function_alias_set_pairs:
    # Translate the must/may alias property into 3-class 1-hot labels.
    if alias_set.type == "may alias":
      false = np.array([1, 0, 0], np.int64)
      true = np.array([0, 1, 0], np.int64)
    elif alias_set.type == "must alias":
      false = np.array([1, 0, 0], np.int64)
      true = np.array([0, 0, 1], np.int64)
    else:
      raise ValueError(f"Unknown alias set type `{alias_set.type}`")

    # Transform pointer name into the node names produced by the ComposeGraphs()
    # method in the graph builder. When we compose multiple graphs, we add the
    # function name as a prefix, and `_operand` suffix to identifier nodes.
    pointers = [
      f"{function}_{p.identifier}_operand" for p in alias_set.pointers
    ]

    root_pointer = random.choice(pointers)
    labelled = g.copy()
    labelled.data_flow_max_steps_required = AnnotateAliasSet(
      labelled, root_pointer, pointers, false=false, true=true
    )
    yield labelled