'divide': ['/', '/='], 'modulus': ['%'] } pluses = ['+', '+=', '++'] minuses = ['-', '-=', '--'] times = ['*', '*='] divides = ['/', '/='] FOR = '/for/block/' WHILE = '/while/block/' IF = '/if/' FUNCTION = '/function' NAME = '/name' RETURN = '/block/return' divs = config.get_div() if 'DivAll' in divs: data_dir = 'data-all/' elif 'Div1' in divs: data_dir = 'data-div-1/' elif 'Div2' in divs: data_dir = 'data-div-2/' # data_dir = 'data-all/' def all_submissions(): df = pd.DataFrame(columns=cols) print(len(df.columns)) feature_set = {} submission_set = {}
# print(len(curr_feats)) return curr_feats operations = ['+','-','*','/','%','+=','-=','*=','/=','++','--'] pluses = ['+', '+=', '++'] minuses = ['-', '-=', '--'] times = ['*', '*='] divides = ['/', '/='] FOR='/for/block/' WHILE='/while/block/' IF='/if/block' FUNCTION='/function' NAME='/name' RETURN='/block/return' divs = config.get_div() if 'DivAll' in divs: data_dir = 'data-all/' elif 'Div1' in divs: data_dir = 'data-div-1/' elif 'Div2' in divs: data_dir = 'data-div-2/' # data_dir = 'data-all/' def all_submissions(): feature_set = {} submission_set = {} idx = 1 for contest in next(os.walk(data_dir))[1]:
import os.path import shutil import config ''' author: tarek date: Aug 23rd 2015 desc: parses submissions.txt looking for contest number, then problems index to groups cpp submissions together in the data dir ''' div = config.get_div() if 'grading' in div: s_file = 'Submissions-grading.txt' data_folder = 'data-grading/' elif 'DivAll' in div: s_file = "DivAll-Submissions.txt" data_folder = 'data-all/' elif 'Div1' in div: s_file = 'Div1-Submissions.txt' data_folder = 'data-div-1/' else: s_file = "Div2-Submissions.txt" data_folder = 'data-div-2/' if 'grading' not in div: s_file = 'dataset/pt_sub/' + s_file src_dir = 'source-code' else: src_dir = 'source-code-grading' if not os.path.exists(data_folder):
import os.path import shutil import config ''' author: tarek date: Aug 23rd 2015 desc: parses submissions.txt looking for contest number, then problems index to groups cpp submissions together in the data dir ''' div = config.get_div() if 'DivAll' in div: s_file = "DivAll-Submissions.txt" data_folder = 'data-all/' elif 'Div1' in div: s_file = 'Div1-Submissions.txt' data_folder = 'data-div-1/' else: s_file = "Div2-Submissions.txt" data_folder = 'data-div-2/' if not os.path.exists(data_folder): os.makedirs(data_folder) with open(s_file) as f: contest = '' index = '' for line in f: if line != "\n": arr_line = line.split("\n")[0].split(" ") if not contest: