def build_meth_vocab(vocab_file, meth_file): vocab_file.seek(0) meth_file.seek(0) vocab = {} last_func = "" for line in vocab_file: if not line[0] == '\t': last_func = line.split()[0] vocab[last_func] = {} else: n = len(line.split()) vocab[last_func][n] = {} for i in range(n): vocab[last_func][n][i-1] = {} for line in meth_file: parts = line.split('#') if len(parts) > 1: call = t.splitLit(parts[0]) func = call[0] args = call[1:] print call n = len(args) for i in range(n): if not args[i] in vocab[func][n][i-1]: vocab[func][n][i-1][args[i]] = 0 vocab[func][n][i-1][args[i]] += 1 return vocab
def getVarLines(var_file): var_file.seek(0) sents = [] sent = [] for line in var_file: if not line[0] == '<': parts = line.split('|') inf = tuple(map(int, parts[1].split())) call = t.splitLit(parts[0]) func = call[0] n = len(call) - 1 stat = (func, n, inf) sent.append(stat) else: sent.append(line[:-1]) if line == "<END>\n": sents.append(sent) sent = [] return sents
def getReducedLines(meth_file): meth_file.seek(0) sents = [] sent = [] for line in meth_file: if not line[0] == '<': parts = line.split('#') inf = tuple(parts[1].split()) call = t.splitLit(parts[0]) func = call[0] n = len(call) - 1 stat = (func, n) sent.append((stat, inf)) else: sent.append((line[:-1], ())) if line == "<END>\n": sents.append(sent) sent = [] return sents