def make_eq(q, a, equations): tdata = [] wps = q # open(q).readlines() for k in range(len(wps)): print(k, equations[k]) answers = utils.get_k_eqs(equations[k], g=True) good = list(set([x for x in answers if x[0] == 1])) bad = list(set([x for x in answers if x[0] == 0]))[:len(good)] ''' if len(bad)>len(good): bad = sample(bad,len(good)) ''' answers = good + bad if answers == []: continue answers = list(set(answers)) # First preprocessing, tokenize slightly problem = utils.preprocess_problem(wps[k]) print(problem) #make story #story = nlp.parse(problem) story = utils.read_parse(int(equations[k])) sets = makesets.makesets(story['sentences']) i = 0 xidx = [i for i, x in enumerate(sets) if x[1].num == 'x'] if not xidx: print("NO X WHY") continue #TODO look for 2 xes xidx = xidx[0] numlist = [(utils.cleannum(v.num), v) for k, v in sets] numlist = [x for x in numlist if x[0] != ''] objs = {k: (0, v) for k, v in numlist} #print(numlist) consts = [ x for x in answers[0][1].split(" ") if x not in [ '(', ')', '+', '-', '/', '*', '=', ] ] #print(consts) present = [x for x in consts if x in objs] if present != consts: print(present, consts) print("missing thing") continue #print(answers) for j, eq, cons in answers: consts = [ x for x in eq.split(" ") if x not in [ '(', ')', '+', '-', '/', '*', '=', ] ] order = int(consts == [x[0] for x in numlist]) if order == 0: continue print(j, eq) l, r = [x.strip().split(' ') for x in eq.split('=')] consts = " ".join([ x for x in answers[0][1].split(" ") if x not in [ '(', ')', '+', '-', '/', '*', ] ]) consts = consts.split(" = ") target = 'x' target = (target, objs[target]) #find innermost parens? sides = [] thisscore = [] for i, compound in enumerate([l, r]): while len(compound) > 1: if "(" in compound: rpidx = (len(compound) - 1) - compound[::-1].index('(') lpidx = rpidx + compound[rpidx:].index(")") subeq = compound[rpidx + 1:lpidx] substr = "(" + ''.join(subeq) + ")" compound = compound[:rpidx] + [substr] compound += compound[lpidx + 1:] else: subeq = compound[0:3] substr = "(" + ''.join(subeq) + ")" compound = [substr] + compound[3:] p, op, e = subeq p = objs[p] e = objs[e] op = op.strip() pute = compute(p, op, e, target, problem, story, order) objs[substr] = pute if pute == -1: exit() score, c, vals = pute thisscore.append(score) sides.append(objs[compound[0]]) p = sides[0] e = sides[1] score = 1 for s in thisscore: score *= s #scores.append((score,j,eq)) tdata.append( training(sides[0], sides[1], problem, story, target, j, order, score, cons)) with open("data/" + sys.argv[1][-1] + ".global.data", 'w') as f: for v in tdata: f.write(str(v[0]) + " ") for i, j in enumerate(v[1:]): f.write(str(i + 1) + ":" + str(j) + " ") f.write("\n")
def make_eq(q, a, equations): wps = q # open(q).readlines() right = 0 wrong = 0 for k in range(len(wps)): answers = utils.get_k_eqs(equations[k], g=True, a=True) if answers == []: continue seeneq = [] seen = [] for x in answers: if x[1] not in seeneq: seen.append(x) seeneq.append(x[1]) answers = seen answers = list(set(answers)) #First preprocessing, tokenize slightly problem = utils.preprocess_problem(wps[k]) print(problem) #make story #story = nlp.parse(problem) story = utils.read_parse(int(equations[k])) sets = makesets.makesets(story['sentences']) i = 0 ###### for x in sets: x[1].details() #continue xidx = [i for i, x in enumerate(sets) if x[1].num == 'x'] if not xidx: print("NO X WHY") continue #TODO look for 2 xes xidx = xidx[0] numlist = [(utils.cleannum(v.num), v) for k, v in sets] numlist = [x for x in numlist if x[0] != ''] objs = {k: (0, v) for k, v in numlist} print(objs.items()) consts = [x for x in answers[0][1].split(" ") if x not in ['(', ')', '+', '-', '/', '*', '=', ]] present = [x for x in consts if x in objs] if consts != present: print(present, consts) print("missing thing") continue if len([x for x in objs if x not in consts]) > 0: print("missing thing") continue scores = [] for j, eq, cons, guess in answers: consts = [x for x in eq.split(" ") if x not in ['(', ')', '+', '-', '/', '*', '=', ]] order = int(consts == [x[0] for x in numlist]) if order == 0: continue #j = randint(0,len(answers)-1) #eq = answers[j] #print(j,eq) l, r = [x.strip().split(' ') for x in eq.split('=')] consts = " ".join([x for x in answers[0][1].split(" ") if x not in ['(', ')', '+', '-', '/', '*', ]]) consts = consts.split(" = ") target = 'x' target = (target, objs[target]) #find innermost parens? sides = [] thisscore = [] for i, compound in enumerate([l, r]): while len(compound) > 1: if "(" in compound: rpidx = (len(compound) - 1) - compound[::-1].index('(') lpidx = rpidx + compound[rpidx:].index(")") subeq = compound[rpidx + 1:lpidx] substr = "(" + ''.join(subeq) + ")" compound = compound[:rpidx] compound += [substr] + compound[lpidx + 1:] else: subeq = compound[0:3] substr = "(" + ''.join(subeq) + ")" compound = [substr] + compound[3:] p, op, e = subeq p = objs[p] e = objs[e] op = op.strip() pute = compute(p, op, e, target, problem, story, order) objs[substr] = pute if pute == -1: exit() score, c, vals = pute thisscore.append(score) sides.append(objs[compound[0]]) p = sides[0] e = sides[1] score = 1 for s in thisscore: score *= s score *= compute( p, '=', e, target, problem, story, order, score, cons )[0] scores.append((score, j, eq, guess)) scores = sorted(scores, reverse=True) righties = [x for x in scores if x[1] == 1] print(scores[:3]) if not righties: wrong += 1 print("TOP SCORING NO CORRECT SOLUTION \nINCORRECT") continue if len(scores) > 0: if scores[0][1] == 1: right += 1 print("CORRECT") else: wrong += 1 print("INCORRECT") else: wrong += 1 print("INCORRECT") return (right, wrong)
def make_eq(q, a, equations): bigtexamples = {x: ([], []) for x in ["+", "*", '/', '-', '=']} wps = q # open(q).readlines() for k in range(len(wps)): # First preprocessing, tokenize slightly problem = utils.preprocess_problem(wps[k]) print(k) print(problem) # story = nlp.parse(problem) story = utils.read_parse(int(equations[k])) eqs = utils.get_k_eqs(equations[k]) answers = [x[1] for x in eqs if x[0] == 1] if answers == []: continue answers = list(set(answers)) print(story["sentences"][0]["text"]) print(answers) #make story #story = nlp.parse(problem) sets = makesets.makesets(story['sentences']) i = 0 xidx = [i for i, x in enumerate(sets) if x[1].num == 'x'] if not xidx: print("NO X WHY") continue numlist = [(utils.cleannum(v.num), v) for k, v in sets] numlist = [x for x in numlist if x[0] != ''] objs = {k: (0, v) for k, v in numlist} print(objs.items()) consts = [x for x in answers[0].split(" ") if x not in ['(', ')', '+', '-', '/', '*', '=', ]] present = [x for x in consts if x in objs] if present != consts: print(present, consts) print("missing thing") exit() #simpleanswers = [] for j, eq in enumerate(answers): trips = [] print(j, eq) l, r = [x.strip().split(' ') for x in eq.split('=')] target = 'x' target = (target, objs[target]) #find innermost parens? for i, compound in enumerate([l, r]): while len(compound) > 1: if "(" in compound: rpidx = (len(compound) - 1) - compound[::-1].index('(') lpidx = rpidx+compound[rpidx:].index(")") subeq = compound[rpidx+1:lpidx] substr = "("+''.join(subeq)+")" compound = compound[:rpidx]+[substr]+compound[lpidx+1:] else: subeq = compound[0:3] substr = "("+''.join(subeq)+")" compound = [substr]+compound[3:] if True: p, op, e = subeq p = objs[p] e = objs[e] op = op.strip() trips.append((op, p, e)) pute = (0, makesets.combine(p[1], e[1], op)) objs[substr] = pute if pute == -1: exit() t = training(trips, problem, story, target) for op in t: bigtexamples[op][0].extend(t[op][0]) bigtexamples[op][1].extend(t[op][1]) with open('data/' + sys.argv[1][-1] + ".local.training", 'wb') as f: pickle.dump(bigtexamples, f)
def make_eq(q, a, equations): bigtexamples = {x: ([], []) for x in ["+", "*", '/', '-', '=']} wps = q # open(q).readlines() for k in range(len(wps)): # First preprocessing, tokenize slightly problem = utils.preprocess_problem(wps[k]) print(k) print(problem) # story = nlp.parse(problem) story = utils.read_parse(int(equations[k])) eqs = utils.get_k_eqs(equations[k]) answers = [x[1] for x in eqs if x[0] == 1] if answers == []: continue answers = list(set(answers)) print(story["sentences"][0]["text"]) print(answers) #make story #story = nlp.parse(problem) sets = makesets.makesets(story['sentences']) i = 0 xidx = [i for i, x in enumerate(sets) if x[1].num == 'x'] if not xidx: print("NO X WHY") continue numlist = [(utils.cleannum(v.num), v) for k, v in sets] numlist = [x for x in numlist if x[0] != ''] objs = {k: (0, v) for k, v in numlist} print(objs.items()) consts = [ x for x in answers[0].split(" ") if x not in [ '(', ')', '+', '-', '/', '*', '=', ] ] present = [x for x in consts if x in objs] if present != consts: print(present, consts) print("missing thing") exit() #simpleanswers = [] for j, eq in enumerate(answers): trips = [] print(j, eq) l, r = [x.strip().split(' ') for x in eq.split('=')] target = 'x' target = (target, objs[target]) #find innermost parens? for i, compound in enumerate([l, r]): while len(compound) > 1: if "(" in compound: rpidx = (len(compound) - 1) - compound[::-1].index('(') lpidx = rpidx + compound[rpidx:].index(")") subeq = compound[rpidx + 1:lpidx] substr = "(" + ''.join(subeq) + ")" compound = compound[:rpidx] + [substr ] + compound[lpidx + 1:] else: subeq = compound[0:3] substr = "(" + ''.join(subeq) + ")" compound = [substr] + compound[3:] if True: p, op, e = subeq p = objs[p] e = objs[e] op = op.strip() trips.append((op, p, e)) pute = (0, makesets.combine(p[1], e[1], op)) objs[substr] = pute if pute == -1: exit() t = training(trips, problem, story, target) for op in t: bigtexamples[op][0].extend(t[op][0]) bigtexamples[op][1].extend(t[op][1]) with open('data/' + sys.argv[1][-1] + ".local.training", 'wb') as f: pickle.dump(bigtexamples, f)