Beispiel #1
0
            proof_steps[split][-1]['goal_id'] = proof_data['goal_id']
            proof_steps[split][-1]['length'] = proof_data['length']
        else:
            proof_steps[split][-1]['is_synthetic'] = False
       

if __name__ == '__main__':
    arg_parser = argparse.ArgumentParser(description='Extract the proof steps from CoqGym for trainig ASTactic via supervised learning')
    arg_parser.add_argument('--data_root', type=str, default='../data',
                                help='The folder for CoqGym')
    arg_parser.add_argument('--output', type=str, default='./proof_steps/',
                                help='The output file')
    arg_parser.add_argument('--filter', type=str, help='filter the proofs')
    args = arg_parser.parse_args()
    print(args)

    iter_proofs(args.data_root, process_proof, include_synthetic=False, show_progress=True)

    for split in ['train', 'valid']:
        print("Proof steps {}: {}".format(split, len(proof_steps[split])))
        for i, step in enumerate(proof_steps[split]):
            dirname = os.path.join(args.output, split)
            if not os.path.exists(dirname):
                os.makedirs(dirname)
            if args.filter:
                pickle.dump(step, open(os.path.join(dirname, '%s-%08d.pickle' % (args.filter, i)), 'wb'))
            else:
                pickle.dump(step, open(os.path.join(dirname, '%08d.pickle' % i), 'wb'))

    print('output saved to ', args.output)
Beispiel #2
0
    data["num_goals"].append(len(proof_data["goals"]))
    data["num_env_constants"].append(len(proof_data["env"]["constants"]))
    data["num_env_inductives"].append(len(proof_data["env"]["inductives"]))
    data["num_env"].append(
        data["num_env_constants"][-1] + data["num_env_inductives"][-1]
    )
    # data['num_env_constants_same_file'].append(len([const for const in proof_data['env']['constants']
    #                                                          if const['qualid'].startswith('SerTop.')]))
    data["avg_size_local_context"].append(
        np.mean([len(g["hypotheses"]) for g in proof_data["goals"].values()])
    )


iter_proofs(
    common.data_root,
    process_proof,
    include_synthetic=args.synthetic,
    show_progress=True,
)
df = pd.DataFrame(data)
df.to_csv(args.output)
print("output saved to ", args.output)

# show some statistics
print(df.describe())
print(
    df.groupby("project").agg(
        {"name": "count", "num_steps": "mean", "num_goals": "mean", "num_env": "mean"}
    )
)
Beispiel #3
0
ast_height = []
num_tokens = []
num_chars = []
has_argument = []

def process_proof(filename, proof_data):
    global ast_height
    global num_tokens
    global num_chars

    for step in proof_data['steps']:
        if step['command'][1] != 'VernacExtend':
            continue
        if not step['command'][0].endswith('.'):
            continue
        tac_str = step['command'][0][:-1]

        try:
            tree = tree_builder.transform(grammar.parser.parse(tac_str))
        except (UnexpectedCharacters, ParseError) as ex:
            continue
        
        ast_height.append(tree.height())
        num_tokens.append(tree.num_tokens())
        num_chars.append(len(tac_str))
        has_argument.append(int(tree.has_argument()))

iter_proofs(common.data_root, process_proof, show_progress=True)
print(np.mean(ast_height), np.mean(num_tokens), np.mean(num_chars), np.mean(has_argument))
Beispiel #4
0
    global abnormal_proofs
    global abnormal_env_files
    tempname = proof_data['name']
    if tempname not in term_proofs:
        return
    goal_dict = proof_data['goals']
    tempenv = proof_data['env']
    """
    for xx in tempenv['constants']:
        if xx['type'] == None:
            abnormal_env_files.add(filename)
            break
    """
    for tempgoalid in goal_dict:
        tempgoal = goal_dict[tempgoalid]
        #if tempgoal['type'] == None:
        #    abnormal_proofs.add((filename, tempname))
        for temphypo in tempgoal['hypotheses']:
            #    if temphypo['type'] == None:
            #        abnormal_proofs.add((filename, tempname))
            if len(temphypo['term']) > 1:
                special_terms_proofs.add((filename, tempname))

    proj = filename.split(os.path.sep)[2]


iter_proofs(common.data_root,
            count_proof,
            include_synthetic=False,
            show_progress=True)
print(special_terms_proofs)
Beispiel #5
0
# get the statistics of the proofs
import argparse
import common
from utils import iter_proofs
import pandas as pd
import numpy as np
import re
import pdb

oup = open('short_proofs.txt', 'wt')


def process_proof(filename, proof_data):
    if 1 <= len(proof_data['steps']) <= 2:
        print(proof_data['steps'][0]['command'][0])
        oup.write(proof_data['steps'][0]['command'][0] + '\n')


iter_proofs(common.data_root, process_proof)