def main():
    parser = argparse.ArgumentParser(prog="competition.py", description='Inject and test LAVA bugs.')
    parser.add_argument('host_json', help = 'Host JSON file')
    parser.add_argument('project', help = 'Project name')

    parser.add_argument('-m', '--many', action="store", default=-1,
            help = 'Inject this many bugs and this many non-bugs (chosen randomly)')
    parser.add_argument('-n', '--minYield', action="store", default=-1,
            help = 'Require at least this many real bugs')
    parser.add_argument('-l', '--buglist', action="store", default=False,
            help = 'Inject this list of bugs')
    parser.add_argument('-e', '--exitCode', action="store", default=0, type=int,
            help = ('Expected exit code when program exits without crashing. Default 0'))
    #parser.add_argument('-i', '--diversify', action="store_true", default=False,
            #help = ('Diversify source code. Default false.'))
    parser.add_argument('-c', '--chaff', action="store_true", default=False, # TODO chaf and unvalided bugs aren't always the same thing
            help = ('Leave unvalidated bugs in the binary'))
    parser.add_argument('-t', '--bugtypes', action="store", default="rel_write",
                        help = ('bug types to inject'))
    parser.add_argument('--version', action="version", version="%(prog)s {}".format(version))

    args = parser.parse_args()
    global project
    project = parse_vars(args.host_json, args.project)

    dataflow = project.get("dataflow", False) # Default to false

    allowed_bugtypes = get_allowed_bugtype_num(args)

    # Set various paths
    lp = LavaPaths(project)

    # Make the bugs top_dir start with competition
    lp.bugs_top_dir = join(lp.top_dir, "competition")
    compdir = join(lp.top_dir, "competition")
    bugdir = join(compdir, "bugs")

    db = LavaDatabase(project)

    if not os.path.exists(bugdir):

    bugs_parent = bugdir


    args.knobTrigger = False
    args.checkStacktrace = False
    failcount = 0

    # generate a random seed to pass through to lavaTool so it behaves deterministcally between runs
    lavatoolseed = random.randint(0, 100000)

    ## First we get a list of bugs, either from cli options, or through competition_bugs_and_non_bugs

    if args.buglist:
        print ("bug_list incoming %s" % (str(args.buglist)))
        bug_list = competition_bugs_and_non_bugs(len(args.buglist), db, allowed_bugtypes, eval(args.buglist)) # XXX EVAL WHY
    elif args.many:
        bug_list = competition_bugs_and_non_bugs(int(args.many), db, allowed_bugtypes, None)
        print("Fatal error: no bugs specified")
        raise RuntimeError

    assert len(bug_list) # Found no bugs

    print('bug_list (len={}):'.format(len(bug_list)))
    bug_list_str = ','.join([str(bug_id) for bug_id in bug_list])

    ## With our bug list in hand, we inject all these bugs and count how many we can trigger

    real_bug_list = []
    # add bugs to the source code and check that we can still compile
    (build, input_files, bug_solutions) = inject_bugs(bug_list, db, lp, args.host_json, \
                                      project, args, False, dataflow=dataflow, competition=True,
                                      validated=False, lavatoolseed=lavatoolseed)
    assert build is not None # build is None when injection fails. Could block here to allow for manual patches

    # Test if the injected bugs cause approperiate crashes and that our competition infrastructure parses the crashes correctly
    real_bug_list = validate_bugs(bug_list, db, lp, project, input_files, build, \
                                      args, False, competition=True, bug_solutions=bug_solutions)

    if len(real_bug_list) < int(args.minYield):
        print("\n\nXXX Yield too low after injection -- Require at least {} bugs for"
                " competition, only have {}".format(args.minYield, len(real_bug_list)))
        raise RuntimeError("Failure")

    print "\n\n Yield acceptable: {}".format(len(real_bug_list))

    # TODO- the rebuild process may invalidate a previously validated bug because the trigger will change
    # Need to find a way to pass data between lavaTool and here so we can reinject *identical* bugs as before

    ## After we have a list of validated bugs, we inject again. This time, we will only inject the bugs we have
    ## already validated, so these should all validate again. Before we reinject these, we'll remove any bugs from
    ## our list that use the same ATP as other bugs we're injecting.

    if not args.chaff:
        # re-build just with the real bugs. Inject in competition mode. Deduplicate bugs with the same ATP location
        print("Reinjecting only validated bugs")

        real_bugs = db.session.query(Bug).filter(Bug.id.in_(real_bug_list)).all()
        real_bug_list = limit_atp_reuse(real_bugs)

        # TODO retry a few times if we fail this test
        if bug_list != real_bug_list: # Only reinject if our bug list has changed
            if len(real_bug_list) < int(args.minYield):
                print("\n\nXXX Yield too low after reducing duplicates -- Require at least {} bugs for  \
                        competition, only have {}".format(args.minYield, len(real_bug_list)))
                raise RuntimeError("Failure")
            (build, input_files, bug_solutions) = inject_bugs(real_bug_list, db, lp, args.host_json, \
                                                  project, args, False, dataflow=dataflow, competition=True, validated=True,

            assert build is not None # build is None if injection fails

    ## Now build our corpora directory with the buggy source dir, binaries in lava-install-public,
    ## lava-install-internal, and scripts to rebuild the binaries

    corpus_dir = join(compdir, "corpora")
    subprocess32.check_call(["mkdir", "-p", corpus_dir])

    # original bugs src dir
    # directory for this corpus
    corpname = "lava-corpus-" + ((datetime.datetime.now()).strftime("%Y-%m-%d-%H-%M-%S"))
    corpdir = join(corpus_dir,corpname)
    subprocess32.check_call(["mkdir", corpdir])

    lava_bd = join(lp.bugs_parent, lp.source_root)

    # Copy lava's builddir into our local build-dir
    bd = join(corpdir, "build-dir")
    shutil.copytree(lava_bd, bd)

    # build internal version
    log_build_sh = join(corpdir, "log_build.sh")

    # We need to set the environmnet for the make command
    log_make = "CFLAGS=-DLAVA_LOGGING {}".format(project["make"])

    internal_builddir = join(corpdir, "lava-install-internal")
    lava_installdir = join(bd, "lava-install")
    with open(log_build_sh, "w") as build:
        pushd `pwd`
        cd {bugs_build}

        # Build internal version
        rm -rf "{internal_builddir}"
        mv lava-install {internal_builddir}

            make_clean = project["clean"] if "clean" in project.keys() else "",
            configure=project['configure'] if "configure" in project.keys() else "",
            log_make = log_make,
            internal_builddir = internal_builddir,
            install = project['install'].format(install_dir=lava_installdir),
            post_install = project['post_install'] if 'post_install' in project.keys() else "",

    # diversify
    if args.diversify:
        print('Starting diversification\n')
        compile_commands = join(bugdir, lp.source_root, "compile_commands.json")
        all_c_files = get_c_files(lp.bugs_build, compile_commands)
        for c_file in all_c_files:
            print('diversifying {}'.format(c_file))
            c_file = join(bugdir, lp.source_root, c_file)
            # pre-processing
            #   run_cmd_notimeout(
            #           ' '.join([
            #           'gcc', '-E', '-std=gnu99',
            #           '-I.', '-I..',
            #           '-I/llvm-3.6.2/Release/lib/clang/3.6.2/include',
            #           '-o',
            #           '{}.pre'.format(c_file),
            #           c_file]))
            # diversify(c_file, '{}.div'.format(c_file))
            # run_cmd_notimeout(' '.join(['cp', '{}.div'.format(c_file), c_file]))

        # re-build
        (rv, outp) = run_cmd_notimeout(project['make'], cwd=lp.bugs_build)
        for o in outp:
        if rv == 0:
            print('build succeeded')
            subprocess32.check_call(project['install'], cwd=lp.bugs_build, shell=True)
            if 'post_install' in project:
                check_call(project['post_install'], cwd=lp.bugs_build, shell=True)
            print('build failed')

        # re-validate
        old_yield = len(real_bug_list)
        real_bug_list = validate_bugs(bug_list, db, lp, project, input_files, build, \
                                          args, False, competition=True, bug_solutions=bug_solutions)
        new_yield = len(real_bug_list)
        print('Old yield: {}'.format(old_yield))
        print('New yield: {}'.format(new_yield))

    # Corpus directory structure: lava-corpus-[date]/
    #   inputs/
    #   src/
    #   build.sh
    #   log_build.sh
    #   lava-install-internal
    #   lava-install-prod

    # subdir with trigger inputs
    inputsdir = join(corpdir, "inputs")
    subprocess32.check_call(["mkdir", inputsdir])
    # subdir with src -- note we can't create it or copytree will fail!
    srcdir = join(corpdir, "src")
    # copy src
    shutil.copytree(bd, srcdir)

    predictions = []
    bug_ids = []

    for bug in db.session.query(Bug).filter(Bug.id.in_(real_bug_list)).all():
        prediction = basename(bug.atp.loc_filename)
        fuzzed_input = fuzzed_input_for_bug(project, bug)
        (dc, fi) = os.path.split(fuzzed_input)
        shutil.copy(fuzzed_input, inputsdir)
        predictions.append((prediction, fi, bug.type))

    print "Answer key:"
    with open(join(corpdir, "ans"), "w") as ans:
        for (prediction, fi, bugtype) in predictions:
            print "ANSWER  [%s] [%s] [%s]" % (prediction, fi, Bug.type_strings[bugtype])
            ans.write("%s %s %s\n" % (prediction, fi, Bug.type_strings[bugtype]))

    with open(join(corpdir, "add_bugs.sql"), "w") as f:
        f.write("/* This file will add all the generated lava_id values to the DB, you must update binary_id */\n")
        f.write("\set binary_id 0\n")
        for bug_id in bug_ids:
            f.write("insert into \"bug\" (\"lava_id\", \"binary\") VALUES (%d, :binary_id); \n" % (bug_id))

    # clean up srcdir before tar
        # Unconfigure
        subprocess32.check_call(["make", "distclean"])

    # Delete private files
    deldirs = [join(srcdir, x) for x in [".git", "lava-instal"]]
    delfiles = [join(srcdir, x) for x in ["compile_commands.json", "btrace.log"]]

    for dirname in deldirs:
        if os.path.isdir(dirname):
    for fname in delfiles:
        if os.path.exists(fname):

    # build source tar
    #tarball = join(srcdir + ".tgz")
    #cmd = "/bin/tar czvf " + tarball + " src"
    #print "created corpus tarball " + tarball + "\n";

    #lp.bugs_install = join(corpdir,"lava-install") # Change to be in our corpdir

    # Save the commands we use into files so we can rerun later
    public_build_sh = join(corpdir, "public_build.sh") # Simple
    public_builddir = join(corpdir, "lava-install-public")
    lava_installdir = join(bd, "lava-install")
    with open(public_build_sh, "w") as build:
        pushd `pwd`
        cd {bugs_build}

        # Build public version
        rm -rf "{public_builddir}"
        mv lava-install {public_builddir}

            make_clean = project["clean"] if "clean" in project.keys() else "",
            configure=project['configure'] if "configure" in project.keys() else "",
            make = project['make'],
            public_builddir = public_builddir,
            install = project['install'].format(install_dir=lava_installdir),
            post_install=project['post_install'] if "post_install" in project.keys() else ""

    trigger_all_crashes = join(corpdir, "trigger_crashes.sh")
    with open(trigger_all_crashes, "w") as build:
rm -rf validated_inputs.txt validated_bugs.txt

trap "echo 'CRASH'" {{3..31}}

for fname in {inputdir}; do
    # Get bug ID from filename (# after last -)
    read -ra fname_parts <<< "$fname"
    for i in ${{fname_parts[@]}}; do
    IFS=' '

    #Non-logging version
    LD_LIBRARY_PATH={librarydir2} {command2} &> /dev/null

    if [ "$code" -gt 130 ]; then # Competition version crashed, check log version
        LD_LIBRARY_PATH={librarydir} {command} &> /tmp/comp.txt
        if [ "$logcode" -lt 131 ]; then # internal version didn't crash
            echo "UNEXPECTED ERROR ($bugid): competition version exited $logcode while normal exited with $code -- Skipping";
            if grep -q "LAVALOG: $bugid" /tmp/comp.txt; then
                echo $fname >> validated_inputs.txt
                echo $bugid >> validated_bugs.txt
                echo "Competition infrastructure failed on $bugid";
done""".format(command = project['command'].format(**{"install_dir": "./lava-install-internal", "input_file": "$fname"}), # This syntax is weird but only thing that works?
            corpdir = corpdir,
            librarydir = join("./lava-install-internal", "lib"),
            librarydir2 = join("./lava-install-public", "lib"),
            command2 = project['command'].format(**{"install_dir": "./lava-install-public", "input_file": "$fname"}), # This syntax is weird but only thing that works?
            inputdir = "./inputs/*-fuzzed-*"
    os.chmod(trigger_all_crashes, (stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR | stat.S_IROTH | stat.S_IXOTH))
    # Build a version to ship in src
    run_builds([log_build_sh, public_build_sh])
    print("Injected {} bugs".format(len(real_bug_list)))

    print("Counting how many crashes competition infrastructure identifies...")
    run_cmd(trigger_all_crashes, cwd=corpdir) # Prints about segfaults
    (rv, outp) = run_cmd("wc -l {}".format(join(corpdir, "validated_bugs.txt")))
    if rv != 0:
        raise RuntimeError("Validated bugs file does not exist. Something went wrong")

    (a,b) = outp[0].split()
    n = int(a)
    print("\tCompetition infrastructure found: %d of %d injected bugs" % (n, len(real_bug_list)))
    subprocess32.check_call(fbi_args, env=envv, stdout=sys.stdout, stderr=sys.stderr)
except subprocess32.CalledProcessError as e:
    print("FBI Failed. Possible causes: \n"+
        "\tNo DUAs found because taint analysis failed: \n"
        "\t\t Ensure PANDA 'saw open of file we want to taint'\n"
        "\t\t Make sure target has debug symbols (version2): No 'failed DWARF loading' messages\n"
        "\tFBI crashed (bad arguments, config, or other untested code)")
    raise e

progress("Found Bugs, Injectable!!")

fib_time = tock()
print("fib complete %.2f seconds" % fib_time)

db = LavaDatabase(project)

print("Count\tBug Type Num\tName")
for i in range(len(Bug.type_strings)):
    n = db.session.query(Bug).filter(Bug.type == i).count()
    print("%d\t%d\t%s" % (n, i, Bug.type_strings[i]))

print("total dua:", db.session.query(Dua).count())
print("total atp:", db.session.query(AttackPoint).count())
print("total bug:", db.session.query(Bug).count())
def main_thread(lock, mon, done_event):
    v0 = 2
    addstr(lock, mon, v0, 11,
           "LAVA: Large-scale Automated Vulnerability Addition", curses.A_BOLD)
    addstr(lock, mon, v0 + 1, 17, "target: %s" % target_name)

    v1 = 5
    # stage 1 -- instrument source
    # ok the add queries log file at least exists
    addstr(lock, mon, v1 + 0, 15, "1. Instrument source w/")
    addstr(lock, mon, v1 + 1, 15, "   dynamic queries & make")
    # get source lines of code
    sb.check_call(["tar", "-xf", project['tarfile'], '-C', '/tmp'])
    outp = sb.check_output(['sloccount', "/tmp/%s" % target_name])
    for line in outp.split("\n"):
        foo = re.search("^ansic:\s+([0-9]+) ", line)
        if foo:
            addstr(lock, mon, v0 + 1, 42, "sloc: " + foo.groups()[0])


    # wait for add queries to finish
    pattern = "add queries complete ([0-9\.]+) seconds"
    wait_for(pattern, add_queries_log)
    ti = extract_float(pattern, add_queries_log)
    # grab some neat stats from logfile too
    pattern = "num taint queries added ([0-9]+)"
    res = find_in_file_extract(pattern, add_queries_log)
    # tally up all the queries
    ntq = 0
    for n in res:
        ntq += int(n)
    pattern = "num atp queries added ([0-9]+)"
    res = find_in_file_extract(pattern, add_queries_log)
    natp = 0
    for n in res:
        natp += int(n)
    addstr(lock, mon, v1, 48, "taint queries: %d" % ntq)
    addstr(lock, mon, v1 + 1, 48, "  atp queries: %d" % natp)


    # stage 2 -- make
    # wait for make to finish
    pattern = "make complete ([0-9\.]+) seconds"
    wait_for(pattern, make_log)

    tm = extract_float(pattern, make_log)

    addstr(lock, mon, v1, 4, "%4.2fs" % (ti + tm))

    #    addstr(lock, mon, 9, 4, "%4.2fs" % tm)

    # stage 2 -- run instr program & record
    v2 = 8
    addstr(lock, mon, v2, 15, "2. Record run of")
    addstr(lock, mon, v2 + 1, 15, "   instrumented program")
    pattern = "panda record complete ([0-9\.]+) seconds"
    wait_for(pattern, bug_mining_log)
    tr = extract_float(pattern, bug_mining_log)
    addstr(lock, mon, v2, 4, "%4.2fs" % tr)

    # stage 3 -- replay + taint
    v3 = 11
    pattern = "Starting first and only replay"
    wait_for(pattern, bug_mining_log)
    addstr(lock, mon, v3, 15, "3. Replay with taint")
    addstr(lock, mon, v3 + 1, 15, "   propagation")

    done = False
    while not done:
        done = check_for("taint analysis complete ([0-9\.]+) seconds",
        if not done:
            logp("still not done")
        pattern = "([0-9\.]+)\%\) instr"
        if (check_for(pattern, bug_mining_log)):
            perc = extract_float(pattern, bug_mining_log)
            addstr(lock, mon, v3 + 1, 35, " %4.2f%%" % perc)
    addstr(lock, mon, v3 + 1, 35, " 100.00%")
    addstr(lock, mon, v3 + 1, 35, "        ")

    # interestiing stats
    pattern = ":\s*([0-9]+) instrs total"
    wait_for(pattern, bug_mining_log)
    ti = extract_int(pattern, bug_mining_log)
    addstr(lock, mon, v3, 48, "instr: %d" % ti)

    pattern = "taint analysis complete ([0-9\.]+) seconds"
    tt = extract_float(pattern, bug_mining_log)
    addstr(lock, mon, v3, 4, "%4.2fs" % tt)

    # figure out how big plog is
    assert os.path.isfile(plog)
    plogsize = os.stat(plog).st_size
    addstr(lock, mon, v3 + 1, 48, " plog: %d" % plogsize)


    # stage 4 -- fbi
    v4 = 16
    addstr(lock, mon, v4, 15, "4. Analyze taint & find")
    addstr(lock, mon, v4 + 1, 15, "   bug inject sites")
    # poll db to find out how many dua and atp we have
    #    first_db = True
    last_num_dua = 0
    last_num_atp = 0
    last_num_bug = 0
    done = False
    db = LavaDatabase(project)
    while not done:
        pattern = "fib complete ([0-9\.]+) seconds"
        done = check_for(pattern, bug_mining_log)
        num_dua = db.session.query(Dua).count()
        num_atp = db.session.query(AttackPoint).count()
        num_bug = db.session.query(Bug).count()
        #        if first_db and (num_dua > 0 or num_atp > 0 or num_bug > 0):
        #            addstr(lock, mon, v4, 48, "Database")
        #            first_db = False
        if num_dua != last_num_dua:
            addstr(lock, mon, v4, 48, " DUAs: %d" % num_dua)
        if num_atp != last_num_atp:
            addstr(lock, mon, v4 + 1, 48, " ATPs: %d" % num_atp)
        if num_bug != last_num_bug:
            addstr(lock, mon, v4 + 2, 48, "pBUGs: %d" % num_bug)
        last_num_dua = num_dua
        last_num_atp = num_atp
        last_num_bug = num_bug

    tf = extract_float(pattern, bug_mining_log)
    addstr(lock, mon, v4, 4, "%4.2fs" % tf)

    # stage 5 inj
    v5 = 20
    for trial in range(1, 2):
        # inject trial $trial
        lf = join(log_dir, "inject-{}.log".format(trial))
        if trial == 1:
            addstr(lock, mon, v5, 15, "5. Inject bugs &")
            addstr(lock, mon, v5 + 1, 15, "   validate")
        vt = v5 + 2 + trial
        addstr(lock, mon, vt, 15, "   trial %d (100 bugs):" % trial)

        # select bugs
        pattern = "INJECTING BUGS (.*) SOURCE"
        wait_for(pattern, lf)
        addstr(lock, mon, vt, 40, "I")

        # compile
        pattern = "ATTEMPTING BUILD (.*) INJECTED BUG"
        wait_for(pattern, lf)
        addstr(lock, mon, vt, 41, "B")

        # validate -- does orig input still exit with 0?
        pattern = "buggy program succeeds (.*) original input"
        wait_for(pattern, lf)
        addstr(lock, mon, vt, 42, "O")

        # validate bugs
        pattern = "FUZZED INPUTS"
        check_for(pattern, lf)
        wait_for(pattern, lf)
        addstr(lock, mon, vt, 43, "V")

        pattern = "yield ([0-9\.]+) \("
        wait_for(pattern, lf)
        y = extract_float(pattern, lf)
        addstr(lock, mon, vt, 40, "yield: %.2f" % y)

        pattern = "inject complete ([0-9\.]+) seconds"
        wait_for(pattern, lf)
        ti = extract_float(pattern, lf)
        addstr(lock, mon, vt, 4, "%.2fs" % ti)

        trial += 1

    last_build = db.session.query(Build).order_by(-Build.id).limit(1).one()
    terminals = []
    src_dir = join(project_dir, 'bugs', '0', target_name)
    install_dir = join(src_dir, 'lava-install')
    for bug in last_build.bugs:
        if db.session.query(Run)\
                .filter(Run.fuzzed == bug)\
                .filter(Run.build == last_build)\
                .filter(Run.exitcode.in_([134, 139, -6, -11]))\
                .count() > 0:
            unfuzzed_input = join(project_dir, 'inputs',
            suff = get_suffix(unfuzzed_input)
            pref = unfuzzed_input[:-len(suff
                                        )] if suff != "" else unfuzzed_input
            fuzzed_input = "{}-fuzzed-{}{}".format(pref, bug.id, suff)
            cmd = project['command'].format(input_file=fuzzed_input,
            script = "echo RUNNING COMMAND for bug {}:; echo; echo FUZZED INPUT {}; echo; echo -n 'md5sum '; md5sum {}; echo; echo {}; echo; echo; LD_LIBRARY_PATH={} {}; /bin/sleep 1000"\
                .format(bug.id, fuzzed_input, fuzzed_input, cmd, join(install_dir, 'lib'), cmd)
                    'gnome-terminal', '--geometry=60x24', '-x', 'bash', '-c',

        while True:
    except KeyboardInterrupt:

        sb.check_call(['killall', 'sleep'])
    except sb.CalledProcessError: