def make_eq(q, a, VERBOSE, TRAIN): bigtexamples = {x: ([], []) for x in ["+", "*", '/', '-', '=']} #wps = open(q).readlines() #answs = open(a).readlines() #VERBOSE=True wps = q for k in range(len(wps)): if VERBOSE: for i in range(len(wps)): print(i, wps[i]) k = int(input()) print(k) problem = wps[k] #First preprocessing, tokenize slightly problem = problem.strip().split(" ") for i, x in enumerate(problem): if len(x) == 0: continue if x[-1] in [',', '.', '?']: problem[i] = x[:-1] + " " + x[-1] problem = ' '.join(problem) problem = " " + problem + " " print(problem) story = nlp.parse(problem) sets = makesets.makesets(story['sentences']) EF.main(sets, k, a[k], sys.argv[1]) sets = [ x for x in sets if makesets.floatcheck(x[1].num) or x[1].num == 'x' ] print(sets) for z in sets: z[1].details()
def make_eq(q,a,VERBOSE,TRAIN,fold): bigtexamples = {x:([],[]) for x in ["+","*",'/','-','=']} #wps = open(q).readlines() #answs = open(a).readlines() #VERBOSE=True wps = q predicates = [] for k in range(len(wps)): if VERBOSE: for i in range(len(wps)): print(i,wps[i]) k = int(input()) print(k) problem = wps[k].lower() #First preprocessing, tokenize slightly problem = problem.strip().split(" ") for i,x in enumerate(problem): if len(x)==0:continue if x[-1] in [',','.','?']: problem[i] = x[:-1]+" "+x[-1] problem = ' '.join(problem) problem = " " + problem + " " print(problem) story = nlp.parse(problem) sets = makesets.makesets(story['sentences']) EF.main(sets,k,a[k])
def make_eq(q, a, VERBOSE, TRAIN): wps = q for k in range(len(wps)): if VERBOSE: for i in range(len(wps)): print(i, wps[i]) k = int(input()) print("Index: " + str(k)) problem = wps[k] #First preprocessing, tokenize slightly problem = problem.strip().split(" ") for i, x in enumerate(problem): if len(x) == 0: continue if x[-1] in [',', '.', '?']: problem[i] = x[:-1] + " " + x[-1] problem = ' '.join(problem) problem = " " + problem + " " print("Problem: " + problem) print("Answer: " + a[k].strip()) story = nlp.parse(problem) sets = makesets.makesets(story['sentences']) sets = [ x for x in sets if makesets.floatcheck(x[1].num) or x[1].num == 'x' ] ents = [x[1].entity for x in sets] print("--- SETS ---") for z in sets: z[1].details()
def make_eq(q,a,VERBOSE,TRAIN): bigtexamples = {x:([],[]) for x in ["+","*",'/','-','=']} #wps = open(q).readlines() #answs = open(a).readlines() #VERBOSE=True wps = q for k in range(len(wps)): if VERBOSE: for i in range(len(wps)): print(i,wps[i]) k = int(input()) print(k) problem = wps[k] #First preprocessing, tokenize slightly problem = problem.strip().split(" ") for i,x in enumerate(problem): if len(x)==0:continue if x[-1] in [',','.','?']: problem[i] = x[:-1]+" "+x[-1] problem = ' '.join(problem) problem = " " + problem + " " print(problem) story = read_parse(k) sets = makesets.makesets(story['sentences']) pickle.dump(sets, open('madesets/'+str(k)+'.pickle','wb')) EF.main(sets,k,a[k],sys.argv[1]) sets = [x for x in sets if makesets.floatcheck(x[1].num) or x[1].num == 'x'] print(sets) for z in sets: z[1].details()
def make_eq(q, a, VERBOSE, TRAIN): #wps = open(q).readlines() #answs = open(a).readlines() #VERBOSE=True wps = q for k in range(len(wps)): if VERBOSE: for i in range(len(wps)): print(i, wps[i]) k = int(input()) print(k) #First preprocessing, tokenize slightly problem = utils.preprocess_problem(wps[k]) print(problem) story = utils.read_parse(k) sets = makesets.makesets(story['sentences']) EF.main(sets, k, a[k], sys.argv[1]) sets = [x for x in sets if makesets.floatcheck(x[1].num) or x[1].num == 'x'] print(sets) for z in sets: z[1].details()
def make_eq(q, a, VERBOSE, TRAIN): #wps = open(q).readlines() #answs = open(a).readlines() #VERBOSE=True wps = q for k in range(len(wps)): if VERBOSE: for i in range(len(wps)): print(i, wps[i]) k = int(input()) print(k) #First preprocessing, tokenize slightly problem = utils.preprocess_problem(wps[k]) print(problem) story = utils.read_parse(k) sets = makesets.makesets(story['sentences']) EF.main(sets, k, a[k], sys.argv[1]) sets = [ x for x in sets if makesets.floatcheck(x[1].num) or x[1].num == 'x' ] print(sets) for z in sets: z[1].details()
def make_eq(q,a,VERBOSE,TRAIN): bigtexamples = {x:([],[]) for x in ["+","*",'/','-','=']} wps = open(q).readlines() #answs = open(a).readlines() #VERBOSE=True if not TRAIN and not VERBOSE: out = open(q+".out.txt",'w') problematic = open('somethingWrongProblems','w') replacements = {' two ':' 2 '," three ":' 3 ',' four ':' 4 ',' five ':' 5 ',' six ':' 6 ',' seven ':' 7 ',' eight ':' 8 ',' nine ':' 9 ',' ten ':' 10 ',' eleven ':' 11 ', ' twice ':' 2 '} for k in range(len(wps)): if VERBOSE: for i in range(len(wps)): print(i,wps[i]) k = int(input()) print(k) problem = wps[k].lower() #First preprocessing, tokenize slightly problem = problem.strip().split(" ") for i,x in enumerate(problem): if len(x)==0:continue if x[-1] in [',','.','?']: problem[i] = x[:-1]+" "+x[-1] problem = ' '.join(problem) problem = " " + problem + " " print(problem) for r in replacements: problem = problem.replace(r,replacements[r]) ''' if " how " in problem: left,right = problem.split(" how ") else: left = problem for r in replacements: left = left.replace(r,replacements[r]) if " how " in problem: problem = left + ' how ' + right else: problem = left ''' story = nlp.parse(problem) sets = makesets.makesets(story['sentences']) EF.main(sets,k) '''
def make_eq(q, a, VERBOSE, TRAIN): wps = q replacements = { ' two ': ' 2 ', " three ": ' 3 ', ' four ': ' 4 ', ' five ': ' 5 ', ' six ': ' 6 ', ' seven ': ' 7 ', ' eight ': ' 8 ', ' nine ': ' 9 ', ' ten ': ' 10 ', ' eleven ': ' 11 ', ' twice ': ' 2 ' } for k in range(len(wps)): if VERBOSE: for i in range(len(wps)): print(i, wps[i]) k = int(input()) print(k) problem = wps[k].lower() #First preprocessing, tokenize slightly problem = problem.strip().split(" ") for i, x in enumerate(problem): if len(x) == 0: continue if x[-1] in [',', '.', '?']: problem[i] = x[:-1] + " " + x[-1] problem = ' '.join(problem) problem = " " + problem + " " print(problem) for r in replacements: problem = problem.replace(r, replacements[r]) story = nlp.parse(problem) sets = makesets.makesets(story['sentences']) EF.main(sets, k, a[k]) sets = [ x for x in sets if makesets.floatcheck(x[1].num) or x[1].num == 'x' ] print(sets) ents = [x[1].entity for x in sets] for z in sets: z[1].details()
def make_eq(q, a, e, VERBOSE, TRAIN): bigtexamples = {x: ([], []) for x in ["+", "*", '/', '-', '=']} #wps = open(q).readlines() #answs = open(a).readlines() #VERBOSE=True wps = q for k in range(len(wps)): if VERBOSE: for i in range(len(wps)): print(i, wps[i]) k = int(input()) print(k) problem = wps[k] #First preprocessing, tokenize slightly problem = problem.strip().split(" ") for i, x in enumerate(problem): if len(x) == 0: continue if x[-1] in [',', '.', '?']: problem[i] = x[:-1] + " " + x[-1] problem = ' '.join(problem) problem = " " + problem + " " print(problem) story = read_parse(k) sets = makesets.makesets(story['sentences']) if e[k].strip() != '': irrelev = e[k].strip().split(" ") imap = [(x[-1], x) for x in irrelev] if not all([x[0] for x in imap]) == '0': print(imap) for x in [x for x in imap if x[0] == '0']: ent, num, v = x[1].split(',') if ent in ["$", "dollar"]: continue else: sets = [x for x in sets if x[1].num != num] pickle.dump(sets, open('madesets/' + str(k) + '.pickle', 'wb')) EF.main(sets, k, a[k], sys.argv[1]) sets = [ x for x in sets if makesets.floatcheck(x[1].num) or x[1].num == 'x' ] print(sets) for z in sets: z[1].details()
def make_eq(q, a, VERBOSE, TRAIN, fold): bigtexamples = {x: ([], []) for x in ["+", "*", '/', '-', '=']} #wps = open(q).readlines() #answs = open(a).readlines() #VERBOSE=True wps = q predicates = [] for k in range(len(wps)): if VERBOSE: for i in range(len(wps)): print(i, wps[i]) k = int(input()) print(k) problem = wps[k].lower() #First preprocessing, tokenize slightly problem = problem.strip().split(" ") for i, x in enumerate(problem): if len(x) == 0: continue if x[-1] in [',', '.', '?']: problem[i] = x[:-1] + " " + x[-1] problem = ' '.join(problem) problem = " " + problem + " " print(problem) story = nlp.parse(problem) sets = makesets.makesets(story['sentences']) #EF.main(sets,k) sets = [ x for x in sets if makesets.floatcheck(x[1].num) or x[1].num == 'x' ] print(sets) for z in sets: if z[1].verbs: predicates.extend(z[1].verbs.split(" ")) predicates = list(set(predicates)) pickle.dump(predicates, open("data/predicates" + fold, 'wb'))
def make_eq(q, a, eqs, VERBOSE, TRAIN): bigtexamples = {x: ([], []) for x in ["+", "*", '/', '-', '=']} wps = q #open(q).readlines() answs = a #open(a).readlines() if not TRAIN and not VERBOSE: out = open(q + ".out.txt", 'w') problematic = open('somethingWrongProblems', 'w') replacements = { ' two ': ' 2 ', " three ": ' 3 ', ' four ': ' 4 ', ' five ': ' 5 ', ' six ': ' 6 ', ' seven ': ' 7 ', ' eight ': ' 8 ', ' nine ': ' 9 ', ' ten ': ' 10 ', ' eleven ': ' 11 ', ' twice ': ' 2 ' } for k in range(len(wps)): print(eqs[k]) if eqs[k].strip() == "None": continue answers = [eqs[k]] if VERBOSE: for i in range(len(wps)): print(i, wps[i]) k = int(input()) print(k) #First preprocessing, tokenize slightly problem = wps[k].lower() problem = problem.strip().split(" ") for i, x in enumerate(problem): if len(x) == 0: continue if x[-1] in [',', '.', '?']: problem[i] = x[:-1] + " " + x[-1] problem = ' '.join(problem) problem = " " + problem + " " print(problem) for r in replacements: problem = problem.replace(r, replacements[r]) #make story story = nlp.parse(problem) sets = makesets.makesets(story['sentences']) i = 0 print(sets) while i < len(sets): dups = [y for y in sets if y[1].idx != None] dups = [y for y in dups if y[1].idx == sets[i][1].idx] if len(dups) > 1: good = [ y for y in dups if len([x for x in y[1].num if x.isdigit()]) > 0 ] if good: others = [x for x in dups if x != good[0]] for x in others: sets.remove(x) else: # just pick 1 for x in dups[1:]: sets.remove(x) i += 1 xidx = [i for i, x in enumerate(sets) if x[1].num == 'x'] if not xidx: problematic.write('no x :' + problem) continue #TODO look for 2 xes xidx = xidx[0] numlist = [(cleannum(v.num), v) for k, v in sets] numlist = [x for x in numlist if x[0] != ''] if VERBOSE: for z, v in numlist: v.details() input() allnumbs = {str(k): v for k, v in numlist} objs = {k: (0, v) for k, v in numlist} for j, eq in enumerate(answers): trips = [] print(j, eq) l, r = [x.strip().split(' ') for x in eq.split('=')] #compound = r if len(l)==1 else l #simplex = l if len(l)==1 else r target = 'x' target = (target, objs[target]) #find innermost parens? sides = [] for i, compound in enumerate([l, r]): while len(compound) > 1: if "(" in compound: rpidx = (len(compound) - 1) - compound[::-1].index('(') lpidx = rpidx + compound[rpidx:].index(")") subeq = compound[rpidx + 1:lpidx] substr = "(" + ''.join(subeq) + ")" compound = compound[:rpidx] + [substr ] + compound[lpidx + 1:] else: subeq = compound[0:3] substr = "(" + ''.join(subeq) + ")" compound = [substr] + compound[3:] if True: p, op, e = subeq #print(p,op,e) p = objs[p] e = objs[e] op = op.strip() trips.append((op, p, e)) pute = (0, makesets.combine(p[1], e[1], op)) #print("OPERATION SELECTED: ",op) #p.details() #e.details() #print(substr,pute[1].num) objs[substr] = pute if pute == -1: exit() t = training(trips, problem, story, target) for op in t: bigtexamples[op][0].extend(t[op][0]) bigtexamples[op][1].extend(t[op][1]) print(op, len(bigtexamples[op][0])) if TRAIN: pickle.dump(bigtexamples, open('data/' + OUT + ".local.training", 'wb'))
def make_eq(q, a, VERBOSE, TRAIN): bigtexamples = {x: ([], []) for x in ["+", "*", '/', '-', '=']} wps = open(q).readlines() answs = open(a).readlines() if not TRAIN and not VERBOSE: out = open(q + ".out.txt", 'w') problematic = open('somethingWrongProblems', 'w') replacements = { ' two ': ' 2 ', " three ": ' 3 ', ' four ': ' 4 ', ' five ': ' 5 ', ' six ': ' 6 ', ' seven ': ' 7 ', ' eight ': ' 8 ', ' nine ': ' 9 ', ' ten ': ' 10 ', ' eleven ': ' 11 ', ' twice ': ' 2 ' } for k in range(len(wps)): if VERBOSE: for i in range(len(wps)): print(i, wps[i]) k = int(input()) print(k) problem = wps[k].lower() #First preprocessing, tokenize slightly problem = problem.strip().split(" ") for i, x in enumerate(problem): if len(x) == 0: continue if x[-1] in [',', '.', '?']: problem[i] = x[:-1] + " " + x[-1] problem = ' '.join(problem) problem = " " + problem + " " print(problem) for r in replacements: problem = problem.replace(r, replacements[r]) ''' if " how " in problem: left,right = problem.split(" how ") else: left = problem for r in replacements: left = left.replace(r,replacements[r]) if " how " in problem: problem = left + ' how ' + right else: problem = left ''' story = nlp.parse(problem) sets = makesets.makesets(story['sentences']) i = 0 print(sets) while i < len(sets): dups = [y for y in sets if y[1].idx != None] dups = [y for y in dups if y[1].idx == sets[i][1].idx] if len(dups) > 1: good = [ y for y in dups if len([x for x in y[1].num if x.isdigit()]) > 0 ] if good: others = [x for x in dups if x != good[0]] for x in others: sets.remove(x) else: # just pick 1 for x in dups[1:]: sets.remove(x) i += 1 xidx = [i for i, x in enumerate(sets) if x[1].num == 'x'] if not xidx: problematic.write('no x :' + problem) continue #TODO look for 2 xes xidx = xidx[0] twoToRight = False if xidx > 0: print(len(sets), xidx) if sets[xidx - 1][1].entity == 'dozen': # 2 vals to right twoToRight = True if len(sets) - xidx > 1: if sets[xidx + 1][1].entity == 'dozen': twoToRight = True if len(sets) - xidx < 3: if sets[xidx][1].entity == 'dozen': twoToRight = True numlist = [(cleannum(v.num), v) for k, v in sets] numlist = [x for x in numlist if x[0] != ''] if VERBOSE: for z, v in numlist: v.details() input() allnumbs = {str(k): v for k, v in numlist} objs = {k: (0, v) for k, v in numlist} print('start solving') print(numlist) if len(numlist) < 2: problematic.write("not enough numbers : " + problem) continue values = [x[0] for x in numlist if x[0] != 'x'] print(values) ST = Solver(values) answers = [] answers = ST.solveEquations(float(answs[k])) print(answs[k]) if not answers: problematic.write("No answers : " + problem + "\n") problematic.write(str([x[0] for x in numlist]) + '\n') problematic.write(answs[k] + '\n') continue print('done solving') # if target has 2 entities, try eqs with = x op y format simpleranswers = None if twoToRight: try: simpleranswers = [ x for x in answers if x.split(" ")[-4] == "=" and ( x.split(" ")[-3] == 'x' or x.split(' ')[-1] == 'x') ] except: pass if not simpleranswers: simpleranswers = [ x for x in answers if x.split(" ")[1] == '=' or x.split(" ")[-2] == "=" ] #simpleranswers = [x for x in answers if x.split(" ")[1] == '=' or x.split(" ")[-2]=="="] #filter out where = in middle if simpler eq exists if simpleranswers: print(answers) answers = simpleranswers[:] else: problematic.write("not simple : " + problem + "\n") continue values = [x[0] for x in numlist] xidx = values.index('x') print(simpleranswers) print(xidx) for a in simpleranswers: aspl = [ x for x in a.split(" ") if x not in ["/", "-", '+', '*', '=', '(', ')'] ] print(a) print(aspl) print(values) aidx = aspl.index('x') print(aidx) if aidx != xidx: print("removing ", a) answers.remove(a) print(answers) if answers == []: answers = simpleranswers print(answers) if not VERBOSE: if not TRAIN: out.write(problem + '\n') out.write(answs[k] + "\n") out.write(str([x[0] for x in numlist])) out.write("\n") for x in answers: out.write(x + "\n") out.write("___\n") if VERBOSE: input() if not TRAIN: continue if len([x for x in answers if x.split(" ")[-2] == "="]) > 0: answers = [x for x in answers if x.split(" ")[-2] == "="] c = randint(0, len(answers) - 1) answers = [answers[c]] for j, eq in enumerate(answers): trips = [] print(j, eq) l, r = [x.strip().split(' ') for x in eq.split('=')] compound = r if len(l) == 1 else l simplex = l if len(l) == 1 else r target = simplex[0] target = (target, objs[target]) #find innermost parens? while len(compound) > 1: if "(" in compound: rpidx = (len(compound) - 1) - compound[::-1].index('(') lpidx = rpidx + compound[rpidx:].index(")") subeq = compound[rpidx + 1:lpidx] substr = "(" + ''.join(subeq) + ")" compound = compound[:rpidx] + [substr ] + compound[lpidx + 1:] else: subeq = compound[0:3] substr = "(" + ''.join(subeq) + ")" compound = [substr] + compound[3:] if True: p, op, e = subeq #print(p,op,e) p = objs[p] e = objs[e] op = op.strip() trips.append((op, p, e)) pute = (0, makesets.combine(p[1], e[1], op)) #print("OPERATION SELECTED: ",op) #p.details() #e.details() #print(substr,pute[1].num) objs[substr] = pute if pute == -1: exit() if simplex == l: trips.append(("=", objs[simplex[0]], objs[compound[0]])) else: trips.append(("=", objs[compound[0]], objs[simplex[0]])) t = training(trips, problem, target) for op in t: bigtexamples[op][0].extend(t[op][0]) bigtexamples[op][1].extend(t[op][1]) print(op, len(bigtexamples[op][0])) if TRAIN: pickle.dump(bigtexamples, open('data/' + OUT + ".training", 'wb'))
def infer(q,a,VERBOSE): wps = open(q).readlines() answs = open(a).readlines() problematic = open('somethingWrongProblems','a') ar = [0,0] sr = [0,0] mr = [0,0] dr = [0,0] replacements = {' two ':' 2 '," three ":' 3 ',' four ':' 4 ',' five ':' 5 ',' six ':' 6 ',' seven ':' 7 ',' eight ':' 8 ',' nine ':' 9 ',' ten ':' 10 ',' eleven ':' 11 ', ' twice ':' 2 '} right = 0 guesses = 0 ad = [] wrong = [] multiops = 0 multiopsright = 0 replacements = {' two ':' 2 '," three ":' 3 ',' four ':' 4 ',' five ':' 5 ',' six ':' 6 ',' seven ':' 7 ',' eight ':' 8 ',' nine ':' 9 ',' ten ':' 10 ',' eleven ':' 11 ', ' twice ':' 2 '} for k in range(len(wps)): if VERBOSE: for i in range(len(wps)): print(i,wps[i]) k = int(input()) print(k) problem = wps[k].lower() #First preprocessing, tokenize slightly problem = problem.strip().split(" ") for i,x in enumerate(problem): if len(x)==0:continue if x[-1] in [',','.','?']: problem[i] = x[:-1]+" "+x[-1] problem = ' '.join(problem) problem = " " + problem + " " print(problem) for r in replacements: problem = problem.replace(r,replacements[r]) story = nlp.parse(problem) sets = makesets.makesets(story['sentences']) #REMOVE DUPS THIS IS BAD: i = 0 while i < len(sets): x = sets[i] dups = [y for y in sets if y[1].num == x[1].num] if len(dups)>1: for x in dups[1:]: sets.remove(x) i+=1 print("Sets detected: ") for x in sets: x[1].details() numlist = [(cleannum(v.num),v) for k,v in sets] numlist = [x for x in numlist if x[0]!=''] if VERBOSE: for z,v in numlist: v.details() input() allnumbs = {str(k):v for k,v in numlist} objs = {k:(0,v) for k,v in numlist} constraints = [] for i in range(len(numlist)): if numlist[i][0][-1] == "*": if i==0:continue constraints.append(numlist[i-1][0]+" * "+numlist[i][0][:-1]) numlist[i] = (''.join([x for x in numlist[i][0] if x not in ['*','/']]),numlist[i][1]) numlist[i][1].num = numlist[i][0] elif numlist[i][0][0] == "*": if i==0:continue numlist[i] = (''.join([x for x in numlist[i][0] if x not in ['*','/']]),numlist[i][1]) tmp = numlist[i-1] numlist[i-1]=numlist[i] numlist[i]=tmp constraints.append(numlist[i-1][0]+" * "+numlist[i][0][1:]) elif numlist[i][0][-1] == "/": if i==0:continue constraints.append(" / "+numlist[i][0][:-1]) numlist[i] = (''.join([x for x in numlist[i][0] if x not in ['*','/']]),numlist[i][1]) objs = {k:(0,v) for k,v in numlist} if len(objs)<2: wrong.append(k) continue if 'x' not in objs: wrong.append(k) continue integerproblem = all([float(x[0]).is_integer() for x in numlist if x[0]!='x']) multi = False if len(objs)>3: multiops+=1 multi = True if VERBOSE: print(objs,numlist,[v.num for k,v in sets]) #print(allnumbs) state = [] #print(numlist) #for e in allnumbs.items(): #print(numlist) numidxlist = [x[0] for x in numlist] ST = StringTemplate(numidxlist, inf=True) scores = [] for j,eq in enumerate(ST.equations): #print(j,eq.toString()) good = False if len(constraints)==0: good = True else: for constraint in constraints: if constraint in eq.toString(): good = True if not good: scores.append(-0.2) continue thisscore = [] #print(eq.toString()) #determine score for this eq l,r = [x.strip().split(' ') for x in eq.toString().split('=')] #print(l,r) if len(r)>1 and len(l)>1: scores.append(-0.2);continue if len(r)>1: compound = r target = l[0] else: #print(constraints) compound = l target = r[0] target = (target,objs[target]) #find innermost parens? while len(compound)>1: if "(" in compound: rpidx = (len(compound) - 1) - compound[::-1].index('(') lpidx = rpidx+compound[rpidx:].index(")") subeq = compound[rpidx+1:lpidx] substr = "("+''.join(subeq)+")" compound = compound[:rpidx]+[substr]+compound[lpidx+1:] else: subeq = compound[0:3] substr = "("+''.join(subeq)+")" compound = [substr]+compound[3:] if substr in objs: pute = objs[substr] #print(pute[0],pute[1].num) else: p,op,e = subeq #print(p,op,e) p = objs[p][1] e = objs[e][1] op = op.strip() pute = compute(p,op,e,target,problem) #print("OPERATION SELECTED: ",op) #p.details() #e.details() #print(substr,pute[1].num) objs[substr]=pute if pute == -1: exit() score,c = pute thisscore.append(score) if target[1][1].entity != c.entity: thisscore.append(-0.2) #print("WAT",thisscore,c.ent,c.num) scores.append(sum(thisscore)) #print(compound) m = np.argmax(scores) #print(scores[m],ST.equations[m].toString()) srt = sorted([(x,i) for i,x in enumerate(scores)],reverse=True) print('\n Top scoring 3 equations: ') for x,i in srt[:3]: print(x,ST.equations[i].toString()) ''' try: if target.ent=='dozen': guess = solve('('+numlist[0].num+'/12)'+"-"+target.num,'x')[0] print(numlist[0].num+"/12="+target.num) else: guess = solve(numlist[0].num+"-"+target.num,'x')[0] print(numlist[0].num+"="+target.num) ''' eqidxs = [y[0] for y in sorted(enumerate(scores),key=lambda x:x[1],reverse=True)] eqnidsx = [x[1] for x in srt] seen = [] tright = 0 for i in eqidxs: if len(seen)>=1:break eq = ST.equations[i].toString() #eq = eq.replace("=",'-') splitEquation = eq.split('=') eq = splitEquation[0] + '- (' + splitEquation[1] + ')' #print(scores[i], eq) try: guess = solve(eq,'x')[0] except: guess = -1 # This is the non-negative constraint # wrapped in a "check for complex number" try statement :/ try: if guess < 0: continue except: continue #this is a constraint agianst fractional answers when the problem is integers if not guess.is_integer: if integerproblem: continue if guess not in seen: seen.append(guess) else: continue answ = float(answs[k]) ops = [x for x in ST.equations[i].toString() if x in ['+','-','*','/']] if guess == answ: print("\nCORRECT") tright=1 ar[0] += ops.count('+') sr[0] += ops.count('-') mr[0] += ops.count('*') dr[0] += ops.count('/') else: print("\nINCORRECT") ar[1] += ops.count('+') sr[1] += ops.count('-') mr[1] += ops.count('*') dr[1] += ops.count('/') print("Guessed Equation : ",ST.equations[i].toString() ) print("Guess : ",guess,"\nTrue Answer :", answ, '\n\n') guesses += len(seen) if tright==1: if multi: multiopsright += 1 right +=1 else: wrong.append(k) #break if VERBOSE: input() continue print(right,guesses) print(multiops,multiopsright) print(ar,sr,mr,dr)
def make_eq(q, a, equations): bigtexamples = {x: ([], []) for x in ["+", "*", '/', '-', '=']} wps = q #open(q).readlines() answs = a #open(a).readlines() for k in range(len(wps)): eqs = get_k_eqs(equations[k]) answers = [x[1] for x in eqs if x[0] == 1] answers = [x for x in answers if x.split()[-2] == '='] answers = [x for x in answers if x.split()[-1] == 'x'] if answers == []: continue answers = list(set(answers)) #First preprocessing, tokenize slightly problem = wps[k] #.lower() problem = problem.strip().split(" ") for i, x in enumerate(problem): if len(x) == 0: continue if x[-1] in [',', '.', '?']: problem[i] = x[:-1] + " " + x[-1] problem = ' '.join(problem) problem = " " + problem + " " print(k) print(problem) #make story story = nlp.parse(problem) sets = makesets.makesets(story['sentences']) i = 0 xidx = [i for i, x in enumerate(sets) if x[1].num == 'x'] if not xidx: print("NO X WHY") continue numlist = [(cleannum(v.num), v) for k, v in sets] numlist = [x for x in numlist if x[0] != ''] allnumbs = {str(k): v for k, v in numlist} objs = {k: (0, v) for k, v in numlist} print(objs.items()) consts = [ x for x in answers[0].split(" ") if x not in [ '(', ')', '+', '-', '/', '*', '=', ] ] present = [x for x in consts if x in objs] if present != consts: print(present, consts) print("missing thing") continue #simpleanswers = [] #for x in answers: # try: # x1 = x[1].strip().split(" ") # if x[-2]=='=' and x[-1]=='x': # simplenaswers.append(x) # except: # pass #if simpleanswers: # answers = simpleanswers #ri = randint(0,len(answers)-1) #if answers == []: # continue #answers = [answers[ri]] for j, eq in enumerate(answers): trips = [] print(j, eq) l, r = [x.strip().split(' ') for x in eq.split('=')] target = 'x' target = (target, objs[target]) #find innermost parens? sides = [] for i, compound in enumerate([l, r]): while len(compound) > 1: if "(" in compound: rpidx = (len(compound) - 1) - compound[::-1].index('(') lpidx = rpidx + compound[rpidx:].index(")") subeq = compound[rpidx + 1:lpidx] substr = "(" + ''.join(subeq) + ")" compound = compound[:rpidx] + [substr ] + compound[lpidx + 1:] else: subeq = compound[0:3] substr = "(" + ''.join(subeq) + ")" compound = [substr] + compound[3:] if True: p, op, e = subeq p = objs[p] e = objs[e] op = op.strip() trips.append((op, p, e)) pute = (0, makesets.combine(p[1], e[1], op)) objs[substr] = pute if pute == -1: exit() t = training(trips, problem, story, target, sets) for op in t: bigtexamples[op][0].extend(t[op][0]) bigtexamples[op][1].extend(t[op][1]) pickle.dump(bigtexamples, open('data/ixl.local.training', 'wb'))
def make_eq(q, a, equations): wps = q #open(q).readlines() answs = a #open(a).readlines() right = 0 wrong = 0 for k in range(len(wps)): answers = get_k_eqs(equations[k], g=True, a=True) if answers == []: continue seeneq = [] seen = [] for x in answers: if x[1] not in seeneq: seen.append(x) seeneq.append(x[1]) answers = seen answers = list(set(answers)) #First preprocessing, tokenize slightly problem = wps[k] #.lower() problem = problem.strip().split(" ") for i, x in enumerate(problem): if len(x) == 0: continue if x[-1] in [',', '.', '?']: problem[i] = x[:-1] + " " + x[-1] problem = ' '.join(problem) problem = " " + problem + " " print(problem) #make story story = nlp.parse(problem) sets = makesets.makesets(story['sentences']) i = 0 ###### for x in sets: x[1].details() #continue xidx = [i for i, x in enumerate(sets) if x[1].num == 'x'] if not xidx: print("NO X WHY") continue #TODO look for 2 xes xidx = xidx[0] numlist = [(cleannum(v.num), v) for k, v in sets] numlist = [x for x in numlist if x[0] != ''] allnumbs = {str(k): v for k, v in numlist} objs = {k: (0, v) for k, v in numlist} print(objs.items()) consts = [ x for x in answers[0][1].split(" ") if x not in [ '(', ')', '+', '-', '/', '*', '=', ] ] present = [x for x in consts if x in objs] if consts != present: print(present, consts) print("missing thing") continue if len([x for x in objs if x not in consts]) > 0: print("missing thing") continue scores = [] for j, eq, cons, guess in answers: eqspl = eq.split(" = ") consts = [ x for x in eq.split(" ") if x not in [ '(', ')', '+', '-', '/', '*', '=', ] ] order = int(consts == [x[0] for x in numlist]) ''' if order == 0: if eqspl[0].strip() == 'x' or eqspl[1].strip()=='x': eq2 = eqspl[1] + " = " + eqspl[0] consts = [x for x in eq2.split(" ") if x not in ['(',')','+','-','/','*','=',]] order = int(consts==[x[0] for x in numlist]) if order == 0: continue ''' if order == 0: continue #j = randint(0,len(answers)-1) #eq = answers[j] trips = [] #print(j,eq) l, r = [x.strip().split(' ') for x in eq.split('=')] consts = " ".join([ x for x in answers[0][1].split(" ") if x not in [ '(', ')', '+', '-', '/', '*', ] ]) consts = consts.split(" = ") sp = (objs[consts[0].split(" ")[-1]], objs[consts[1].split(" ")[0]]) target = 'x' target = (target, objs[target]) #find innermost parens? sides = [] thisscore = [] for i, compound in enumerate([l, r]): while len(compound) > 1: if "(" in compound: rpidx = (len(compound) - 1) - compound[::-1].index('(') lpidx = rpidx + compound[rpidx:].index(")") subeq = compound[rpidx + 1:lpidx] substr = "(" + ''.join(subeq) + ")" compound = compound[:rpidx] + [substr ] + compound[lpidx + 1:] else: subeq = compound[0:3] substr = "(" + ''.join(subeq) + ")" compound = [substr] + compound[3:] p, op, e = subeq p = objs[p] e = objs[e] op = op.strip() pute = compute(p, op, e, target, problem, story, order) objs[substr] = pute if pute == -1: exit() score, c, vals = pute thisscore.append(score) sides.append(objs[compound[0]]) p = sides[0] e = sides[1] score = 1 for s in thisscore: score *= s score *= compute(p, '=', e, target, problem, story, order, score, cons)[0] scores.append((score, j, eq, guess)) scores = sorted(scores, reverse=True) righties = [x for x in scores[:3] if x[1] == 1] if not righties: wrong += 1 print(scores[:3]) print("TOP SCORING NO CORRECT SOLUTION \nINCORRECT") continue else: print(scores[:3]) corr = righties[0][3] ''' guessd = {} for x in scores[:3]: if x[3] not in guessd: guessd[x[3]]=x[0] else: guessd[x[3]]+=x[0] guessd = sorted(guessd.items(),key=lambda x: x[1],reverse=True) if guessd[0][0]==corr: right+=1 print("CORRECT") else: wrong += 1 print("INCORRECT") ''' if len(scores) > 0: if scores[0][1] == 1: right += 1 print("CORRECT") else: wrong += 1 print("INCORRECT") else: wrong += 1 print("INCORRECT") return (right, wrong)
def score(problem): problem = problem.strip().split(" ") for i, x in enumerate(problem): if len(x) == 0: continue if x[-1] in [',', '.', '?']: problem[i] = x[:-1] + " " + x[-1] problem = ' '.join(problem) problem = " " + problem + " " print(problem) for r in replacements: problem = problem.replace(r, replacements[r]) story = nlp.parse(problem) sets = makesets.makesets(story['sentences']) xidx = [i for i, x in enumerate(sets) if x[1].num == 'x'] twoToRight = False if not xidx: print("AAAH~! NO X!") else: xidx = xidx[0] if xidx > 0: print(len(sets), xidx) if sets[xidx - 1][1].entity == 'dozen': # 2 vals to right twoToRight = True if len(sets) - xidx > 1: if sets[xidx + 1][1].entity == 'dozen': twoToRight = True if len(sets) - xidx < 3: if sets[xidx][1].entity == 'dozen': twoToRight = True print("Sets detected: ") for x in sets: x[1].details() numlist = [(cleannum(v.num), v) for k, v in sets] numlist = [x for x in numlist if x[0] != ''] if VERBOSE: for z, v in numlist: v.details() input() allnumbs = {str(k): v for k, v in numlist} objs = {k: (0, v) for k, v in numlist} constraints = [] for i in range(len(numlist)): if numlist[i][0][-1] == "*": if i == 0: continue constraints.append(numlist[i - 1][0] + " * " + numlist[i][0][:-1]) numlist[i] = (''.join([ x for x in numlist[i][0] if x not in ['*', '/'] ]), numlist[i][1]) numlist[i][1].num = numlist[i][0] elif numlist[i][0][0] == "*": if i == 0: continue numlist[i] = (''.join([ x for x in numlist[i][0] if x not in ['*', '/'] ]), numlist[i][1]) tmp = numlist[i - 1] numlist[i - 1] = numlist[i] numlist[i] = tmp constraints.append(numlist[i - 1][0] + " * " + numlist[i][0][1:]) elif numlist[i][0][-1] == "/": if i == 0: continue constraints.append(" / " + numlist[i][0][:-1]) numlist[i] = (''.join([ x for x in numlist[i][0] if x not in ['*', '/'] ]), numlist[i][1]) objs = {k: (0, v) for k, v in numlist} if 'x' not in objs: return -1 if len(objs) < 2: return -1 integerproblem = all( [float(x[0]).is_integer() for x in numlist if x[0] != 'x']) numidxlist = [x[0] for x in numlist] tripeqs = [] if len(numidxlist) <= 3: ST = StringTemplate(numidxlist, inf=True) tripeqs = ST.equations else: for x in numidxlist: if x != 'x': ST = StringTemplate([y for y in numidxlist if y != x], inf=True) tripeqs.extend(ST.equations) scores = [] equalsmatch = [] contmatch = [] failurerate = [] fivescores = [] for j, eq in enumerate(tripeqs): #print(j,eq.toString()) good = False ''' if len(constraints)==0: good = True else: for constraint in constraints: if constraint in eq.toString(): good = True if not good: scores.append(-0.2) continue ''' thisscore = [] thisfivescore = [[0] * 4, [0] * 4, [0] * 4, [0] * 4, [0] * 4] fivei = 0 #print(eq.toString()) #determine score for this eq l, r = [x.strip().split(' ') for x in eq.toString().split('=')] #print(l,r) if twoToRight: if len(r) != 3 and len(l) != 3: scores.append(-0.2) equalsmatch.append('x') contmatch.append('x') failurerate.append('x') fivescores.append(thisfivescore) continue if len(r) == 3: compound = r target = 'x' else: compound = l target = 'x' else: if len(r) > 1 and len(l) > 1: scores.append(-0.2) equalsmatch.append('x') contmatch.append('x') failurerate.append('x') fivescores.append(thisfivescore) continue ''' if len(r)>1: compound = r target = l[0] else: compound = l target = r[0] ''' target = 'x' target = (target, objs[target]) #find innermost parens? sides = [] for compound in [l, r]: c = None while len(compound) > 1: if "(" in compound: rpidx = (len(compound) - 1) - compound[::-1].index('(') lpidx = rpidx + compound[rpidx:].index(")") subeq = compound[rpidx + 1:lpidx] substr = "(" + ''.join(subeq) + ")" compound = compound[:rpidx] + [substr ] + compound[lpidx + 1:] else: subeq = compound[0:3] substr = "(" + ''.join(subeq) + ")" compound = [substr] + compound[3:] if substr in objs: pute = objs[substr] #print(pute[0],pute[1].num) else: p, op, e = subeq #print(p,op,e) p = objs[p][1] e = objs[e][1] op = op.strip() pute = compute(p, op, e, target, problem, story) #print("OPERATION SELECTED: ",op) #p.details() #e.details() #print(substr,pute[1].num) objs[substr] = pute if pute == -1: exit() score, c, vals = pute thisscore.append(score) if fivei < 5: thisfivescore[fivei] = vals fivei += 1 if c == None: score, c = objs[compound[0]] thisscore.append(score) sides.append(c) thisscore.append(score) fivescores.append(thisfivescore) if sides[0].entity == sides[1].entity: #thisscore.append(-0.2) equalsmatch.append(1) else: equalsmatch.append(0) failurerate.append(sum([objs[x][1].type_failure for x in objs])) if target[1][1].container != c.container: contmatch.append(1) else: contmatch.append(0) if len(thisscore) == 0: scores.append(0) else: scores.append(sum(thisscore) / float(len(thisscore))) return (tripeqs, scores, equalsmatch, contmatch, integerproblem, failurerate, fivescores)
def make_eq(q, a, equations): tdata = [] wps = q #open(q).readlines() answs = a #open(a).readlines() for k in range(len(wps)): answers = get_k_eqs(equations[k]) if answers == []: continue answers = list(set(answers)) #First preprocessing, tokenize slightly problem = wps[k] #.lower() problem = problem.strip().split(" ") for i, x in enumerate(problem): if len(x) == 0: continue if x[-1] in [',', '.', '?']: problem[i] = x[:-1] + " " + x[-1] problem = ' '.join(problem) problem = " " + problem + " " print(problem) #make story story = nlp.parse(problem) sets = makesets.makesets(story['sentences']) i = 0 xidx = [i for i, x in enumerate(sets) if x[1].num == 'x'] if not xidx: print("NO X WHY") continue #TODO look for 2 xes xidx = xidx[0] numlist = [(cleannum(v.num), v) for k, v in sets] numlist = [x for x in numlist if x[0] != ''] allnumbs = {str(k): v for k, v in numlist} objs = {k: (0, v) for k, v in numlist} print(objs.items()) consts = [ x for x in answers[0][1].split(" ") if x not in [ '(', ')', '+', '-', '/', '*', '=', ] ] present = [x for x in consts if x in objs] if consts != present: print(present, consts) print("missing thing") continue order = int(consts == [x[0] for x in numlist]) for j, eq in answers: #j = randint(0,len(answers)-1) #eq = answers[j] trips = [] print(j, eq) l, r = [x.strip().split(' ') for x in eq.split('=')] consts = " ".join([ x for x in answers[0][1].split(" ") if x not in [ '(', ')', '+', '-', '/', '*', ] ]) consts = consts.split(" = ") sp = (objs[consts[0].split(" ")[-1]][1], objs[consts[1].split(" ")[0]][1]) target = 'x' target = (target, objs[target]) #find innermost parens? sides = [] for i, compound in enumerate([l, r]): while len(compound) > 1: if "(" in compound: rpidx = (len(compound) - 1) - compound[::-1].index('(') lpidx = rpidx + compound[rpidx:].index(")") subeq = compound[rpidx + 1:lpidx] substr = "(" + ''.join(subeq) + ")" compound = compound[:rpidx] + [substr ] + compound[lpidx + 1:] else: subeq = compound[0:3] substr = "(" + ''.join(subeq) + ")" compound = [substr] + compound[3:] if True: p, op, e = subeq p = objs[p] e = objs[e] op = op.strip() #trips.append((op,p,e)) pute = (0, makesets.combine(p[1], e[1], op)) objs[substr] = pute if pute == -1: exit() sides.append(objs[compound[0]]) tdata.append( training(sides[0], sides[1], problem, story, target, j, order, sp)) f = open("data/" + sys.argv[1][-1] + ".global.data", 'w') for v in tdata: f.write(str(v[0]) + " ") for i, j in enumerate(v[1:]): f.write(str(i + 1) + ":" + str(j) + " ") f.write("\n")
def make_eq(q): bigtexamples = {x:([],[]) for x in ["+","*",'/','-','=']} wps,eqs= parse(q) for k in range(len(wps)): if len(wps[k])==0:continue problem = wps[k][0].lower() story = nlp.parse(problem) sets = makesets.makesets(story['sentences']) i = 0 print(sets) while i < len(sets): dups = [y for y in sets if y[1].idx != None] dups = [y for y in dups if y[1].idx == sets[i][1].idx] if len(dups)>1: good = [y for y in dups if len([x for x in y[1].num if x.isdigit()])>0] if good: others = [x for x in dups if x!=good[0]] for x in others: sets.remove(x) else: # just pick 1 for x in dups[1:]: sets.remove(x) i+=1 xidx = [x for x in sets if x[1].num=='x'] if not xidx: problematic.write('no x :'+problem); continue #TODO look for 2 xes ''' xidx = xidx[0][0] postx = [x for x in numbs if x[0]>=xidx] if len(postx)>1: # 2 vals to right twoToRight = True else: twoToRight = False ''' numlist = [(cleannum(v.num),v) for k,v in sets] numlist = [x for x in numlist if x[0]!=''] allnumbs = {str(k):v for k,v in numlist} objs = {k:(0,v) for k,v in numlist} answers = eqs[k] for j,eq in enumerate(answers): trips = [] print(j,eq) l,r = [x.strip().split(' ') for x in eq.split('=')] compound = r if len(l)==1 else l simplex = l if len(l)==1 else r target = simplex[0] target = (target,objs[target]) #find innermost parens? while len(compound)>1: if "(" in compound: rpidx = (len(compound) - 1) - compound[::-1].index('(') lpidx = rpidx+compound[rpidx:].index(")") subeq = compound[rpidx+1:lpidx] substr = "("+''.join(subeq)+")" compound = compound[:rpidx]+[substr]+compound[lpidx+1:] else: subeq = compound[0:3] substr = "("+''.join(subeq)+")" compound = [substr]+compound[3:] if True: p,op,e = subeq #print(p,op,e) p = objs[p] e = objs[e] op = op.strip() trips.append((op,p,e)) pute = (0,makesets.combine(p[1],e[1],op)) #print("OPERATION SELECTED: ",op) #p.details() #e.details() #print(substr,pute[1].num) objs[substr]=pute if pute == -1: exit() if simplex == l: trips.append(("=",objs[simplex[0]],objs[compound[0]])) else: trips.append(("=",objs[compound[0]],objs[simplex[0]])) t = training(trips,problem,target) for op in t: bigtexamples[op][0].extend(t[op][0]) bigtexamples[op][1].extend(t[op][1]) print(op,len(bigtexamples[op][0])) pickle.dump(bigtexamples,open('data/gold_training.pickle','wb'))
def dotrain(): if len(sys.argv) > 1: wps = open(sys.argv[1]).readlines() answs = open(sys.argv[2]).readlines() else: wps = open("emnlp_noIrrelev_p.txt").readlines() answs = open("emnlp_noIrrelev_a.txt").readlines() problematic = open('nogoodtrainproblems', 'w') bigtexamples = {x: ([], []) for x in ["+", "*", '/', '-', '=']} replacements = { ' two ': ' 2 ', " three ": ' 3 ', ' four ': ' 4 ', ' five ': ' 5 ', ' six ': ' 6 ', ' seven ': ' 7 ', ' eight ': ' 8 ', ' nine ': ' 9 ', ' ten ': ' 10 ', ' eleven ': ' 11 ', ' week ': ' 7 days ', ' dozen ': ' 12 of ', ' dozens ': ' 12 ', ' twice ': ' 2 ' } for k in range(len(wps)): print(k) problem = wps[k].lower() for r in replacements: problem = problem.replace(r, replacements[r]) #extract numbers: #problem = ' '.join([x.replace(",","") for x in problem.split()]) story = nlp.parse(problem) numbs = makesets.makesets(story['sentences']) numlist = [(cleannum(v.num), v) for k, v in numbs] numlist = [x for x in numlist if x[0] != ''] allnumbs = {str(k): v for k, v in numlist} if 'x' not in allnumbs: if 'x*' not in allnumbs: problematic.write('no x :' + problem) continue objs = {k: (0, v) for k, v in numlist} print('start solving') print(numlist) if len(numlist) < 2: problematic.write("not enough numbers : " + problem) continue ST = Solver([x[0] for x in numlist if x[0] != 'x']) answers = ST.solveEquations(float(answs[k])) print('done solving') #filter out where = in middle if simpler eq exists simpleranswers = [ x for x in answers if x.split(" ")[1] == '=' or x.split(" ")[-2] == "=" ] if not answers: continue if simpleranswers: answers = simpleranswers else: print(answers) problematic.write("not simple : " + problem) continue answervals = [ x for x in answers[0].split(" ") if x not in ['+', '-', '/', '=', ')', '(', '*'] ] numvals = [x[0] for x in numlist if x[0] in answervals] xidx = numvals.index("x") rightidx = [ i for i, x in enumerate(answers) if [ z for z in x.split(" ") if z not in ['+', '-', '/', '=', ')', '(', '*'] ].index('x') == xidx ] xrightanswers = [answers[i] for i in rightidx] if xrightanswers: answers = xrightanswers for j, eq in enumerate(answers): trips = [] print(j, eq) l, r = [x.strip().split(' ') for x in eq.split('=')] compound = l if len(r) == 1 else r simplex = l if len(l) == 1 else r target = simplex[0] target = (target, objs[target]) #find innermost parens? while len(compound) > 1: if "(" in compound: rpidx = (len(compound) - 1) - compound[::-1].index('(') lpidx = rpidx + compound[rpidx:].index(")") subeq = compound[rpidx + 1:lpidx] substr = "(" + ''.join(subeq) + ")" compound = compound[:rpidx] + [substr ] + compound[lpidx + 1:] else: subeq = compound[0:3] substr = "(" + ''.join(subeq) + ")" compound = [substr] + compound[3:] if True: p, op, e = subeq #print(p,op,e) p = objs[p] e = objs[e] op = op.strip() trips.append((op, p, e)) pute = (0, makesets.combine(p[1], e[1], op)) #print("OPERATION SELECTED: ",op) #p.details() #e.details() #print(substr,pute[1].num) objs[substr] = pute if pute == -1: exit() if simplex == l: trips.append(("=", objs[simplex[0]], objs[compound[0]])) else: trips.append(("=", objs[compound[0]], objs[simplex[0]])) t = training(trips, problem, target) for op in t: bigtexamples[op][0].extend(t[op][0]) bigtexamples[op][1].extend(t[op][1]) print(op, len(bigtexamples[op][0])) pickle.dump(bigtexamples, open('data/dev_training.pickle', 'wb'))
def infer(q,a,cutoff,VERBOSE): training = [] wps = open(q).readlines() answs = open(a).readlines() problematic = open('somethingWrongProblems','a') ar = [0,0] sr = [0,0] mr = [0,0] dr = [0,0] right = 0 guesses = 0 ad = [] wrong = [] multiops = 0 multiopsright = 0 replacements = {' two ':' 2 '," three ":' 3 ',' four ':' 4 ',' five ':' 5 ',' six ':' 6 ',' seven ':' 7 ',' eight ':' 8 ',' nine ':' 9 ',' ten ':' 10 ',' eleven ':' 11 ', ' twice ':' 2 '} for k in range(len(wps)): if VERBOSE: for i in range(len(wps)): print(i,wps[i]) k = int(input()) print(k) problem = wps[k].lower() #First preprocessing, tokenize slightly problem = problem.strip().split(" ") for i,x in enumerate(problem): if len(x)==0:continue if x[-1] in [',','.','?']: problem[i] = x[:-1]+" "+x[-1] problem = ' '.join(problem) problem = " " + problem + " " print(problem) for r in replacements: problem = problem.replace(r,replacements[r]) story = nlp.parse(problem) sets = makesets.makesets(story['sentences']) #REMOVE DUPS THIS IS BAD: i = 0 while i < len(sets): x = sets[i] dups = [y for y in sets if y[1].num == x[1].num] if len(dups)>1: for x in dups[1:]: sets.remove(x) i+=1 print("Sets detected: ") for x in sets: x[1].details() numlist = [(cleannum(v.num),v) for k,v in sets] numlist = [x for x in numlist if x[0]!=''] if VERBOSE: for z,v in numlist: v.details() input() allnumbs = {str(k):v for k,v in numlist} objs = {k:(0,v) for k,v in numlist} constraints = [] for i in range(len(numlist)): if numlist[i][0][-1] == "*": if i==0:continue constraints.append(numlist[i-1][0]+" * "+numlist[i][0][:-1]) numlist[i] = (''.join([x for x in numlist[i][0] if x not in ['*','/']]),numlist[i][1]) numlist[i][1].num = numlist[i][0] elif numlist[i][0][0] == "*": if i==0:continue numlist[i] = (''.join([x for x in numlist[i][0] if x not in ['*','/']]),numlist[i][1]) tmp = numlist[i-1] numlist[i-1]=numlist[i] numlist[i]=tmp constraints.append(numlist[i-1][0]+" * "+numlist[i][0][1:]) elif numlist[i][0][-1] == "/": if i==0:continue constraints.append(" / "+numlist[i][0][:-1]) numlist[i] = (''.join([x for x in numlist[i][0] if x not in ['*','/']]),numlist[i][1]) objs = {k:(0,v) for k,v in numlist} if len(objs)<2: wrong.append(k) continue if 'x' not in objs: wrong.append(k) continue integerproblem = all([float(x[0]).is_integer() for x in numlist if x[0]!='x']) multi = False if len(objs)>3: multiops+=1 multi = True if VERBOSE: print(objs,numlist,[v.num for k,v in sets]) #print(allnumbs) state = [] #print(numlist) #for e in allnumbs.items(): #print(numlist) numidxlist = [x[0] for x in numlist] ST = StringTemplate(numidxlist, inf=True) scores = [] equalsmatch = [] contmatch = [] for j,eq in enumerate(ST.equations): #print(j,eq.toString()) good = False ''' if len(constraints)==0: good = True else: for constraint in constraints: if constraint in eq.toString(): good = True if not good: scores.append(-0.2) continue ''' thisscore = [] #print(eq.toString()) #determine score for this eq l,r = [x.strip().split(' ') for x in eq.toString().split('=')] #print(l,r) if len(r)>1 and len(l)>1: scores.append(-0.2); equalsmatch.append('x'); contmatch.append('x') continue if len(r)>1: compound = r target = l[0] else: #print(constraints) compound = l target = r[0] target = (target,objs[target]) #find innermost parens? while len(compound)>1: if "(" in compound: rpidx = (len(compound) - 1) - compound[::-1].index('(') lpidx = rpidx+compound[rpidx:].index(")") subeq = compound[rpidx+1:lpidx] substr = "("+''.join(subeq)+")" compound = compound[:rpidx]+[substr]+compound[lpidx+1:] else: subeq = compound[0:3] substr = "("+''.join(subeq)+")" compound = [substr]+compound[3:] if substr in objs: pute = objs[substr] #print(pute[0],pute[1].num) else: p,op,e = subeq #print(p,op,e) p = objs[p][1] e = objs[e][1] op = op.strip() pute = compute(p,op,e,target,problem) #print("OPERATION SELECTED: ",op) #p.details() #e.details() #print(substr,pute[1].num) objs[substr]=pute if pute == -1: exit() score,c = pute thisscore.append(score) if target[1][1].entity == c.entity: #thisscore.append(-0.2) equalsmatch.append(1) else: equalsmatch.append(0) if target[1][1].container != c.container: contmatch.append(1) else: contmatch.append(0) scores.append(sum(thisscore)) #print(compound) m = np.argmax(scores) #print(scores[m],ST.equations[m].toString()) srt = sorted([(x,i) for i,x in enumerate(scores)],reverse=True) print('\n Top scoring 3 equations: ') for x,i in srt[:3]: print(x,ST.equations[i].toString()) ''' try: if target.ent=='dozen': guess = solve('('+numlist[0].num+'/12)'+"-"+target.num,'x')[0] print(numlist[0].num+"/12="+target.num) else: guess = solve(numlist[0].num+"-"+target.num,'x')[0] print(numlist[0].num+"="+target.num) ''' eqidxs = [y[0] for y in sorted(enumerate(scores),key=lambda x:x[1],reverse=True)] seen = [] tright = 0 for i in eqidxs[:cutoff]: eq = ST.equations[i].toString() ogeq = ST.equations[i].toString() if equalsmatch[i]=='x': continue #eq = eq.replace("=",'-') splitEquation = eq.split('=') eq = splitEquation[0] + '- (' + splitEquation[1] + ')' #print(scores[i], eq) try: guess = solve(eq,'x')[0] except: continue if guess not in seen: seen.append(guess) else: continue # in a "check for complex number" try statement :/ try: if guess < 0: pass except: continue answ = float(answs[k]) vec = [] #build training vector if guess == answ: vec.append(1) else: vec.append(0) vec.append(int(float(guess)<0)) vec.append(int(integerproblem)) vec.append(int(ogeq.index("=")==1)) vec.append(int(eq.split(" ")[-1]==x)) vec.append(equalsmatch[i]) vec.append(contmatch[i]) vec.append(int(guess.is_integer)) #lexical items vec.append(int("at first " in problem)) vec.append(int("start " in problem)) vec.append(int(" now " in problem)) vec.append(int(" total " in problem)) vec.append(int(" equally " in problem)) vec.append(int(" equal " in problem)) training.append(vec) f = open("data/single.global.data",'w') for v in training: f.write(str(v[0])+" ") for i,j in enumerate(v[1:]): f.write(str(i+1)+":"+str(j)+" ") f.write("\n")
def make_eq(q, a, equations): wps = q # open(q).readlines() right = 0 wrong = 0 for k in range(len(wps)): answers = utils.get_k_eqs(equations[k], g=True, a=True) if answers == []: continue seeneq = [] seen = [] for x in answers: if x[1] not in seeneq: seen.append(x) seeneq.append(x[1]) answers = seen answers = list(set(answers)) #First preprocessing, tokenize slightly problem = utils.preprocess_problem(wps[k]) print(problem) #make story #story = nlp.parse(problem) story = utils.read_parse(int(equations[k])) sets = makesets.makesets(story['sentences']) i = 0 ###### for x in sets: x[1].details() #continue xidx = [i for i, x in enumerate(sets) if x[1].num == 'x'] if not xidx: print("NO X WHY") continue #TODO look for 2 xes xidx = xidx[0] numlist = [(utils.cleannum(v.num), v) for k, v in sets] numlist = [x for x in numlist if x[0] != ''] objs = {k: (0, v) for k, v in numlist} print(objs.items()) consts = [x for x in answers[0][1].split(" ") if x not in ['(', ')', '+', '-', '/', '*', '=', ]] present = [x for x in consts if x in objs] if consts != present: print(present, consts) print("missing thing") continue if len([x for x in objs if x not in consts]) > 0: print("missing thing") continue scores = [] for j, eq, cons, guess in answers: consts = [x for x in eq.split(" ") if x not in ['(', ')', '+', '-', '/', '*', '=', ]] order = int(consts == [x[0] for x in numlist]) if order == 0: continue #j = randint(0,len(answers)-1) #eq = answers[j] #print(j,eq) l, r = [x.strip().split(' ') for x in eq.split('=')] consts = " ".join([x for x in answers[0][1].split(" ") if x not in ['(', ')', '+', '-', '/', '*', ]]) consts = consts.split(" = ") target = 'x' target = (target, objs[target]) #find innermost parens? sides = [] thisscore = [] for i, compound in enumerate([l, r]): while len(compound) > 1: if "(" in compound: rpidx = (len(compound) - 1) - compound[::-1].index('(') lpidx = rpidx + compound[rpidx:].index(")") subeq = compound[rpidx + 1:lpidx] substr = "(" + ''.join(subeq) + ")" compound = compound[:rpidx] compound += [substr] + compound[lpidx + 1:] else: subeq = compound[0:3] substr = "(" + ''.join(subeq) + ")" compound = [substr] + compound[3:] p, op, e = subeq p = objs[p] e = objs[e] op = op.strip() pute = compute(p, op, e, target, problem, story, order) objs[substr] = pute if pute == -1: exit() score, c, vals = pute thisscore.append(score) sides.append(objs[compound[0]]) p = sides[0] e = sides[1] score = 1 for s in thisscore: score *= s score *= compute( p, '=', e, target, problem, story, order, score, cons )[0] scores.append((score, j, eq, guess)) scores = sorted(scores, reverse=True) righties = [x for x in scores if x[1] == 1] print(scores[:3]) if not righties: wrong += 1 print("TOP SCORING NO CORRECT SOLUTION \nINCORRECT") continue if len(scores) > 0: if scores[0][1] == 1: right += 1 print("CORRECT") else: wrong += 1 print("INCORRECT") else: wrong += 1 print("INCORRECT") return (right, wrong)
def make_eq(q, a, VERBOSE, TRAIN): bigtexamples = {x: ([], []) for x in ["+", "*", '/', '-', '=']} wps = q #open(q).readlines() answs = a #open(a).readlines() if not TRAIN and not VERBOSE: out = open("whatever.out.txt", 'w') problematic = open('somethingWrongProblems', 'w') replacements = { ' two ': ' 2 ', " three ": ' 3 ', ' four ': ' 4 ', ' five ': ' 5 ', ' six ': ' 6 ', ' seven ': ' 7 ', ' eight ': ' 8 ', ' nine ': ' 9 ', ' ten ': ' 10 ', ' eleven ': ' 11 ', ' twice ': ' 2 ' } topeq = open("topeq.txt", 'w') for k in range(len(wps)): if VERBOSE: for i in range(len(wps)): print(i, wps[i]) k = int(input()) print(k) problem = wps[k].lower() #First preprocessing, tokenize slightly problem = problem.strip().split(" ") for i, x in enumerate(problem): if len(x) == 0: continue if x[-1] in [',', '.', '?']: problem[i] = x[:-1] + " " + x[-1] problem = ' '.join(problem) problem = " " + problem + " " print(problem) for r in replacements: problem = problem.replace(r, replacements[r]) ''' if " how " in problem: left,right = problem.split(" how ") else: left = problem for r in replacements: left = left.replace(r,replacements[r]) if " how " in problem: problem = left + ' how ' + right else: problem = left ''' story = nlp.parse(problem) sets = makesets.makesets(story['sentences']) i = 0 print(sets) while i < len(sets): dups = [y for y in sets if y[1].idx != None] dups = [y for y in dups if y[1].idx == sets[i][1].idx] if len(dups) > 1: good = [ y for y in dups if len([x for x in y[1].num if x.isdigit()]) > 0 ] if good: others = [x for x in dups if x != good[0]] for x in others: sets.remove(x) else: # just pick 1 for x in dups[1:]: sets.remove(x) i += 1 xidx = [i for i, x in enumerate(sets) if x[1].num == 'x'] if not xidx: problematic.write('no x :' + problem) continue #TODO look for 2 xes xidx = xidx[0] twoToRight = False if xidx > 0: print(len(sets), xidx) if sets[xidx - 1][1].entity in ['dozen', 'bill']: # 2 vals to right twoToRight = True if len(sets) - xidx > 1: if sets[xidx + 1][1].entity == 'dozen': twoToRight = True if len(sets) - xidx < 3: if sets[xidx][1].entity == 'dozen': twoToRight = True numlist = [(cleannum(v.num), v) for k, v in sets] numlist = [x for x in numlist if x[0] != ''] if VERBOSE: for z, v in numlist: v.details() input() allnumbs = {str(k): v for k, v in numlist} objs = {k: (0, v) for k, v in numlist} print('start solving') print(numlist) if len(numlist) < 2: problematic.write("not enough numbers : " + problem) continue values = [x[0] for x in numlist if x[0] != 'x'] print(values) ST = Solver(values) answers = [] answers = ST.solveEquations(float(answs[k])) print(answs[k]) if not answers: problematic.write("No answers : " + problem + "\n") problematic.write(str([x[0] for x in numlist]) + '\n') problematic.write(answs[k] + '\n') continue print('done solving') # if target has 2 entities, try eqs with = x op y format simpleranswers = None if twoToRight: try: simpleranswers = [ x for x in answers if x.split(" ")[-4] == "=" and ( x.split(" ")[-3] == 'x' or x.split(' ')[-1] == 'x') ] except: pass if not simpleranswers: simpleranswers = [ x for x in answers if x.split(" ")[1] == '=' or x.split(" ")[-2] == "=" ] #simpleranswers = [x for x in answers if x.split(" ")[1] == '=' or x.split(" ")[-2]=="="] #filter out where = in middle if simpler eq exists if simpleranswers: print(answers) answers = simpleranswers[:] else: problematic.write("not simple : " + problem + "\n") continue values = [x[0] for x in numlist] xidx = values.index('x') print(simpleranswers) print(xidx) for a in simpleranswers: aspl = [ x for x in a.split(" ") if x not in ["/", "-", '+', '*', '=', '(', ')'] ] print(a) print(aspl) print(values) aidx = aspl.index('x') print(aidx) if aidx != xidx: print("removing ", a) answers.remove(a) print(answers) if answers == []: answers = simpleranswers print(answers) if not VERBOSE: if not TRAIN: out.write(problem + '\n') out.write(answs[k] + "\n") out.write(str([x[0] for x in numlist])) out.write("\n") for x in answers: out.write(x + "\n") out.write("___\n") if len([x for x in answers if x.split(" ")[-2] == "="]) > 0: answers = [x for x in answers if x.split(" ")[-2] == "="] c = randint(0, len(answers) - 1) answers = [answers[c]] topeq.write(str(k) + " : " + str(answers[0]) + "\n")
def dotrain(): if len(sys.argv)>1: wps = open(sys.argv[1]).readlines() answs = open(sys.argv[2]).readlines() else: wps = open("emnlp_noIrrelev_p.txt").readlines() answs = open("emnlp_noIrrelev_a.txt").readlines() problematic = open('nogoodtrainproblems','w') bigtexamples = {x:([],[]) for x in ["+","*",'/','-','=']} replacements = {' two ':' 2 '," three ":' 3 ',' four ':' 4 ',' five ':' 5 ',' six ':' 6 ',' seven ':' 7 ',' eight ':' 8 ',' nine ':' 9 ',' ten ':' 10 ',' eleven ':' 11 ',' week ':' 7 days ',' dozen ':' 12 of ', ' dozens ': ' 12 ', ' twice ':' 2 '} for k in range(len(wps)): print(k) problem = wps[k].lower() for r in replacements: problem = problem.replace(r,replacements[r]) #extract numbers: #problem = ' '.join([x.replace(",","") for x in problem.split()]) story = nlp.parse(problem) numbs = makesets.makesets(story['sentences']) numlist = [(cleannum(v.num),v) for k,v in numbs] numlist = [x for x in numlist if x[0]!=''] allnumbs = {str(k):v for k,v in numlist} if 'x' not in allnumbs: if 'x*' not in allnumbs: problematic.write('no x :'+problem); continue objs = {k:(0,v) for k,v in numlist} print('start solving') print(numlist) if len(numlist)<2: problematic.write("not enough numbers : "+problem);continue ST = Solver([x[0] for x in numlist if x[0]!='x']) answers = ST.solveEquations(float(answs[k])) print('done solving') #filter out where = in middle if simpler eq exists simpleranswers = [x for x in answers if x.split(" ")[1] == '=' or x.split(" ")[-2]=="="] if not answers: continue if simpleranswers: answers = simpleranswers else: print(answers) problematic.write("not simple : "+problem);continue answervals = [x for x in answers[0].split(" ") if x not in ['+','-','/','=',')','(','*']] numvals = [x[0] for x in numlist if x[0] in answervals] xidx = numvals.index("x") rightidx = [i for i,x in enumerate(answers) if [z for z in x.split(" ") if z not in ['+','-','/','=',')','(','*']].index('x')==xidx] xrightanswers = [answers[i] for i in rightidx] if xrightanswers: answers = xrightanswers for j,eq in enumerate(answers): trips = [] print(j,eq) l,r = [x.strip().split(' ') for x in eq.split('=')] compound = l if len(r)==1 else r simplex = l if len(l)==1 else r target = simplex[0] target = (target,objs[target]) #find innermost parens? while len(compound)>1: if "(" in compound: rpidx = (len(compound) - 1) - compound[::-1].index('(') lpidx = rpidx+compound[rpidx:].index(")") subeq = compound[rpidx+1:lpidx] substr = "("+''.join(subeq)+")" compound = compound[:rpidx]+[substr]+compound[lpidx+1:] else: subeq = compound[0:3] substr = "("+''.join(subeq)+")" compound = [substr]+compound[3:] if True: p,op,e = subeq #print(p,op,e) p = objs[p] e = objs[e] op = op.strip() trips.append((op,p,e)) pute = (0,makesets.combine(p[1],e[1],op)) #print("OPERATION SELECTED: ",op) #p.details() #e.details() #print(substr,pute[1].num) objs[substr]=pute if pute == -1: exit() if simplex == l: trips.append(("=",objs[simplex[0]],objs[compound[0]])) else: trips.append(("=",objs[compound[0]],objs[simplex[0]])) t = training(trips,problem,target) for op in t: bigtexamples[op][0].extend(t[op][0]) bigtexamples[op][1].extend(t[op][1]) print(op,len(bigtexamples[op][0])) pickle.dump(bigtexamples,open('data/dev_training.pickle','wb'))
def make_eq(q, a, equations): bigtexamples = {x: ([], []) for x in ["+", "*", '/', '-', '=']} wps = q # open(q).readlines() for k in range(len(wps)): # First preprocessing, tokenize slightly problem = utils.preprocess_problem(wps[k]) print(k) print(problem) # story = nlp.parse(problem) story = utils.read_parse(int(equations[k])) eqs = utils.get_k_eqs(equations[k]) answers = [x[1] for x in eqs if x[0] == 1] if answers == []: continue answers = list(set(answers)) print(story["sentences"][0]["text"]) print(answers) #make story #story = nlp.parse(problem) sets = makesets.makesets(story['sentences']) i = 0 xidx = [i for i, x in enumerate(sets) if x[1].num == 'x'] if not xidx: print("NO X WHY") continue numlist = [(utils.cleannum(v.num), v) for k, v in sets] numlist = [x for x in numlist if x[0] != ''] objs = {k: (0, v) for k, v in numlist} print(objs.items()) consts = [ x for x in answers[0].split(" ") if x not in [ '(', ')', '+', '-', '/', '*', '=', ] ] present = [x for x in consts if x in objs] if present != consts: print(present, consts) print("missing thing") exit() #simpleanswers = [] for j, eq in enumerate(answers): trips = [] print(j, eq) l, r = [x.strip().split(' ') for x in eq.split('=')] target = 'x' target = (target, objs[target]) #find innermost parens? for i, compound in enumerate([l, r]): while len(compound) > 1: if "(" in compound: rpidx = (len(compound) - 1) - compound[::-1].index('(') lpidx = rpidx + compound[rpidx:].index(")") subeq = compound[rpidx + 1:lpidx] substr = "(" + ''.join(subeq) + ")" compound = compound[:rpidx] + [substr ] + compound[lpidx + 1:] else: subeq = compound[0:3] substr = "(" + ''.join(subeq) + ")" compound = [substr] + compound[3:] if True: p, op, e = subeq p = objs[p] e = objs[e] op = op.strip() trips.append((op, p, e)) pute = (0, makesets.combine(p[1], e[1], op)) objs[substr] = pute if pute == -1: exit() t = training(trips, problem, story, target) for op in t: bigtexamples[op][0].extend(t[op][0]) bigtexamples[op][1].extend(t[op][1]) with open('data/' + sys.argv[1][-1] + ".local.training", 'wb') as f: pickle.dump(bigtexamples, f)
def make_eq(q,a,VERBOSE,TRAIN): bigtexamples = {x:([],[]) for x in ["+","*",'/','-','=']} wps = open(q).readlines() answs = open(a).readlines() if not TRAIN and not VERBOSE: out = open(q+".out.txt",'w') problematic = open('somethingWrongProblems','w') replacements = {' two ':' 2 '," three ":' 3 ',' four ':' 4 ',' five ':' 5 ',' six ':' 6 ',' seven ':' 7 ',' eight ':' 8 ',' nine ':' 9 ',' ten ':' 10 ',' eleven ':' 11 ', ' twice ':' 2 '} for k in range(len(wps)): if VERBOSE: for i in range(len(wps)): print(i,wps[i]) k = int(input()) print(k) problem = wps[k].lower() #First preprocessing, tokenize slightly problem = problem.strip().split(" ") for i,x in enumerate(problem): if len(x)==0:continue if x[-1] in [',','.','?']: problem[i] = x[:-1]+" "+x[-1] problem = ' '.join(problem) problem = " " + problem + " " print(problem) for r in replacements: problem = problem.replace(r,replacements[r]) ''' if " how " in problem: left,right = problem.split(" how ") else: left = problem for r in replacements: left = left.replace(r,replacements[r]) if " how " in problem: problem = left + ' how ' + right else: problem = left ''' story = nlp.parse(problem) sets = makesets.makesets(story['sentences']) i = 0 print(sets) while i < len(sets): dups = [y for y in sets if y[1].idx != None] dups = [y for y in dups if y[1].idx == sets[i][1].idx] if len(dups)>1: good = [y for y in dups if len([x for x in y[1].num if x.isdigit()])>0] if good: others = [x for x in dups if x!=good[0]] for x in others: sets.remove(x) else: # just pick 1 for x in dups[1:]: sets.remove(x) i+=1 xidx = [x for x in sets if x[1].num=='x'] if not xidx: problematic.write('no x :'+problem); continue #TODO look for 2 xes ''' xidx = xidx[0][0] postx = [x for x in numbs if x[0]>=xidx] if len(postx)>1: # 2 vals to right twoToRight = True else: twoToRight = False ''' numlist = [(cleannum(v.num),v) for k,v in sets] numlist = [x for x in numlist if x[0]!=''] if VERBOSE: for z,v in numlist: v.details() input() allnumbs = {str(k):v for k,v in numlist} objs = {k:(0,v) for k,v in numlist} print('start solving') print(numlist) if len(numlist)<2: problematic.write("not enough numbers : "+problem);continue values = [x[0] for x in numlist if x[0]!='x'] print(values) ST = Solver(values) answers = [] answers = ST.solveEquations(float(answs[k])) if not answers: problematic.write("No answers : " + problem + "\n") problematic.write(str([x[0] for x in numlist])+'\n') problematic.write(answs[k]+'\n') continue print('done solving') # if target has 2 entities, try eqs with = x op y format ''' simpleranswers = None if twoToRight: try: simpleranswers = [x for x in answers if x.split(" ")[-4]=="=" and x.split(" ")[-3]=='x'] except: pass if not simpleranswers: simpleranswers = [x for x in answers if x.split(" ")[1] == '=' or x.split(" ")[-2]=="="] ''' simpleranswers = [x for x in answers if x.split(" ")[1] == '=' or x.split(" ")[-2]=="="] #filter out where = in middle if simpler eq exists if simpleranswers: answers = simpleranswers[:] else: problematic.write("not simple : "+problem+"\n");continue values = [x[0] for x in numlist] xidx = values.index('x') print(xidx) for a in simpleranswers: aspl = [x for x in a.split(" ") if x not in ["/","-",'+','*','=','(',')']] print(a);print(aspl);print(values) aidx = aspl.index('x') print(aidx) if aidx != xidx: print("removing ",a) answers.remove(a) print(answers) if answers==[]: answers = simpleranswers print(answers) if not VERBOSE: if not TRAIN: out.write(problem + '\n') out.write(answs[k] + "\n") out.write(str([x[0] for x in numlist])) out.write("\n") for x in answers: out.write(x + "\n") out.write("___\n") if VERBOSE: input() if not TRAIN: continue c = randint(0,len(answers)-1) answers = [answers[c]] for j,eq in enumerate(answers): trips = [] print(j,eq) l,r = [x.strip().split(' ') for x in eq.split('=')] compound = r if len(l)==1 else l simplex = l if len(l)==1 else r target = simplex[0] target = (target,objs[target]) #find innermost parens? while len(compound)>1: if "(" in compound: rpidx = (len(compound) - 1) - compound[::-1].index('(') lpidx = rpidx+compound[rpidx:].index(")") subeq = compound[rpidx+1:lpidx] substr = "("+''.join(subeq)+")" compound = compound[:rpidx]+[substr]+compound[lpidx+1:] else: subeq = compound[0:3] substr = "("+''.join(subeq)+")" compound = [substr]+compound[3:] if True: p,op,e = subeq #print(p,op,e) p = objs[p] e = objs[e] op = op.strip() trips.append((op,p,e)) pute = (0,makesets.combine(p[1],e[1],op)) #print("OPERATION SELECTED: ",op) #p.details() #e.details() #print(substr,pute[1].num) objs[substr]=pute if pute == -1: exit() if simplex == l: trips.append(("=",objs[simplex[0]],objs[compound[0]])) else: trips.append(("=",objs[compound[0]],objs[simplex[0]])) t = training(trips,problem,target) for op in t: bigtexamples[op][0].extend(t[op][0]) bigtexamples[op][1].extend(t[op][1]) print(op,len(bigtexamples[op][0])) if TRAIN: pickle.dump(bigtexamples,open('data/'+OUT+".training",'wb'))
def make_eq(q, a, equations): tdata = [] wps = q # open(q).readlines() for k in range(len(wps)): print(k, equations[k]) answers = utils.get_k_eqs(equations[k], g=True) good = list(set([x for x in answers if x[0] == 1])) bad = list(set([x for x in answers if x[0] == 0]))[:len(good)] ''' if len(bad)>len(good): bad = sample(bad,len(good)) ''' answers = good + bad if answers == []: continue answers = list(set(answers)) # First preprocessing, tokenize slightly problem = utils.preprocess_problem(wps[k]) print(problem) #make story #story = nlp.parse(problem) story = utils.read_parse(int(equations[k])) sets = makesets.makesets(story['sentences']) i = 0 xidx = [i for i, x in enumerate(sets) if x[1].num == 'x'] if not xidx: print("NO X WHY") continue #TODO look for 2 xes xidx = xidx[0] numlist = [(utils.cleannum(v.num), v) for k, v in sets] numlist = [x for x in numlist if x[0] != ''] objs = {k: (0, v) for k, v in numlist} #print(numlist) consts = [ x for x in answers[0][1].split(" ") if x not in [ '(', ')', '+', '-', '/', '*', '=', ] ] #print(consts) present = [x for x in consts if x in objs] if present != consts: print(present, consts) print("missing thing") continue #print(answers) for j, eq, cons in answers: consts = [ x for x in eq.split(" ") if x not in [ '(', ')', '+', '-', '/', '*', '=', ] ] order = int(consts == [x[0] for x in numlist]) if order == 0: continue print(j, eq) l, r = [x.strip().split(' ') for x in eq.split('=')] consts = " ".join([ x for x in answers[0][1].split(" ") if x not in [ '(', ')', '+', '-', '/', '*', ] ]) consts = consts.split(" = ") target = 'x' target = (target, objs[target]) #find innermost parens? sides = [] thisscore = [] for i, compound in enumerate([l, r]): while len(compound) > 1: if "(" in compound: rpidx = (len(compound) - 1) - compound[::-1].index('(') lpidx = rpidx + compound[rpidx:].index(")") subeq = compound[rpidx + 1:lpidx] substr = "(" + ''.join(subeq) + ")" compound = compound[:rpidx] + [substr] compound += compound[lpidx + 1:] else: subeq = compound[0:3] substr = "(" + ''.join(subeq) + ")" compound = [substr] + compound[3:] p, op, e = subeq p = objs[p] e = objs[e] op = op.strip() pute = compute(p, op, e, target, problem, story, order) objs[substr] = pute if pute == -1: exit() score, c, vals = pute thisscore.append(score) sides.append(objs[compound[0]]) p = sides[0] e = sides[1] score = 1 for s in thisscore: score *= s #scores.append((score,j,eq)) tdata.append( training(sides[0], sides[1], problem, story, target, j, order, score, cons)) with open("data/" + sys.argv[1][-1] + ".global.data", 'w') as f: for v in tdata: f.write(str(v[0]) + " ") for i, j in enumerate(v[1:]): f.write(str(i + 1) + ":" + str(j) + " ") f.write("\n")
def make_eq(q, a, equations): wps = q #open(q).readlines() answs = a #open(a).readlines() right = 0 wrong = 0 #IRR = open("output_relevant.txt").readlines() #IRR = [x.strip().split(" ") for x in IRR] #IRR = [[y.split(",")[1:] for y in x] for x in IRR] for k in range(len(wps)): answers = javad_train_local.get_k_eqs(equations[k], g=True, a=True) answers = [x for x in answers if x[1].split()[-2] == '='] answers = [ x for x in answers if x[1].split()[-1] == 'x' or x[1].split()[-3] == 'x' ] if answers == []: continue seeneq = [] seen = [] for x in answers: if x[1] not in seeneq: seen.append(x) seeneq.append(x[1]) answers = seen answers = list(set(answers)) #First preprocessing, tokenize slightly problem = wps[k] #.lower() problem = problem.strip().split(" ") for i, x in enumerate(problem): if len(x) == 0: continue if x[-1] in [',', '.', '?']: problem[i] = x[:-1] + " " + x[-1] problem = ' '.join(problem) problem = " " + problem + " " print(problem) #irrelev = IRR[k] #if len(set([x[1] for x in irrelev]))==1: # irrelev = False #make story story = nlp.parse(problem) sets = makesets.makesets(story['sentences']) i = 0 ###### for x in sets: x[1].details() #continue xidx = [i for i, x in enumerate(sets) if x[1].num == 'x'] if not xidx: print("NO X WHY") continue #TODO look for 2 xes xidx = xidx[0] numlist = [(cleannum(v.num), v) for k, v in sets] numlist = [x for x in numlist if x[0] != ''] allnumbs = {str(k): v for k, v in numlist} objs = {k: (0, v) for k, v in numlist} print(objs.items()) consts = [ x for x in answers[0][1].split(" ") if x not in ['(', ')', '+', '-', '/', '*', '=', 'x'] ] present = [x for x in consts if x in objs] if consts != present: print(present, consts) print("missing thing") continue #if len([x for x in objs if x not in consts])>0: print("missing thing");continue scores = [] ############# THING ''' numlist2 = [x[0] for x in numlist if x[0] != 'x'] ok = True if len(numlist2)==2: a = float(numlist2[0]) b = float(numlist2[1]) if a < b: answers = [x for x in answers if (x[1].split()[-1]=='x' and x[1].split()[1]=='+') or (x[1].split()[1]=='-' and x[1].split()[2]=='x')] ok = False if ok: answers = [x for x in answers if x[1].split()[-1]=='x'] thing = problem.split(".")[-1] if ' how ' in problem: problem = problem.split(" how ")[-1] thing = ''.join([x.lower() for x in thing if x.isalpha() or x==' ']) thing = [x for x in thing.split(" ") if x in ['more','farther','taller','longer','less']] if len(thing)>0: numlist2 = [x[0] for x in numlist if x[0] != 'x'] print(numlist2) if len(numlist2)==2: print("THING HAPPENING") a = numlist2[0] b = numlist2[1] print(a,b) an = answs[k].strip() a = float(a) b = float(b) if a<b: answers = b - a # str(a)+" + "+str(an)+" == "+str(b) else: answers = a - b #str(a)+" - "+str(b)+" == "+str(an) if abs(answers - float(an))<0.001: print("CORRECT") right+=1 else: print("INCORRECT") wrong+=1 continue ''' for j, eq, cons, guess in answers: eqspl = eq.split(" = ") consts = [ x for x in eq.split(" ") if x not in ['(', ')', '+', '-', '/', '*', '=', 'x'] ] #order = int(consts==[x[0] for x in numlist]) order = int(consts == [x[0] for x in numlist if x[0] in consts]) if order == 0: continue #j = randint(0,len(answers)-1) #eq = answers[j] trips = [] #print(j,eq) l, r = [x.strip().split(' ') for x in eq.split('=')] consts = " ".join([ x for x in answers[0][1].split(" ") if x not in [ '(', ')', '+', '-', '/', '*', ] ]) consts = consts.split(" = ") sp = (objs[consts[0].split(" ")[-1]], objs[consts[1].split(" ")[0]]) target = 'x' target = (target, objs[target]) #find innermost parens? sides = [] thisscore = [] for i, compound in enumerate([l, r]): while len(compound) > 1: if "(" in compound: rpidx = (len(compound) - 1) - compound[::-1].index('(') lpidx = rpidx + compound[rpidx:].index(")") subeq = compound[rpidx + 1:lpidx] substr = "(" + ''.join(subeq) + ")" compound = compound[:rpidx] + [substr ] + compound[lpidx + 1:] else: subeq = compound[0:3] substr = "(" + ''.join(subeq) + ")" compound = [substr] + compound[3:] p, op, e = subeq p = objs[p] e = objs[e] op = op.strip() pute = compute(p, op, e, target, problem, story, order) objs[substr] = pute if pute == -1: exit() score, c, vals = pute thisscore.append(score) sides.append(objs[compound[0]]) p = sides[0] e = sides[1] score = 1 for s in thisscore: score *= s score *= compute(p, '=', e, target, problem, story, order, score, cons)[0] scores.append((score, j, eq, guess)) scores = sorted(scores, reverse=True) righties = [x for x in scores[:3] if x[1] == 1] if not righties: wrong += 1 print(scores[:3]) print("TOP SCORING NO CORRECT SOLUTION \nINCORRECT") continue else: print(scores[:3]) corr = righties[0][3] ''' guessd = {} for x in scores[:3]: if x[3] not in guessd: guessd[x[3]]=x[0] else: guessd[x[3]]+=x[0] guessd = sorted(guessd.items(),key=lambda x: x[1],reverse=True) if guessd[0][0]==corr: right+=1 print("CORRECT") else: wrong += 1 print("INCORRECT") ''' if len(scores) > 0: if scores[0][1] == 1: right += 1 print("CORRECT") else: wrong += 1 print("INCORRECT") else: wrong += 1 print("INCORRECT") return (right, wrong)
for k in range(len(wps)): if VERBOSE: for i in range(len(wps)): print(i,wps[i]) k = int(input()) print(wps[k]) problem = wps[k].lower() for r in replacements: problem = problem.replace(r,replacements[r]) #extract numbers: problem = ' '.join([x.replace(",","") for x in problem.split()]) story = nlp.parse(problem) numbs = makesets.makesets(story['sentences']) numlist = [(cleannum(v.num),v) for k,v in numbs] numlist = [x for x in numlist if x[0]!=''] allnumbs = {str(k):v for k,v in numlist} for v,x in numlist: x.details() constraints = [] for i in range(len(numlist)): if numlist[i][0][-1] == "*": if i==0:continue constraints.append(numlist[i-1][0]+" * "+numlist[i][0][:-1]) numlist[i] = (''.join([x for x in numlist[i][0] if x not in ['*','/']]),numlist[i][1]) numlist[i][1].num = numlist[i][0] elif numlist[i][0][0] == "*": if i==0:continue
def make_eq(q, a, equations): bigtexamples = {x: ([], []) for x in ["+", "*", '/', '-', '=']} wps = q # open(q).readlines() for k in range(len(wps)): # First preprocessing, tokenize slightly problem = utils.preprocess_problem(wps[k]) print(k) print(problem) # story = nlp.parse(problem) story = utils.read_parse(int(equations[k])) eqs = utils.get_k_eqs(equations[k]) answers = [x[1] for x in eqs if x[0] == 1] if answers == []: continue answers = list(set(answers)) print(story["sentences"][0]["text"]) print(answers) #make story #story = nlp.parse(problem) sets = makesets.makesets(story['sentences']) i = 0 xidx = [i for i, x in enumerate(sets) if x[1].num == 'x'] if not xidx: print("NO X WHY") continue numlist = [(utils.cleannum(v.num), v) for k, v in sets] numlist = [x for x in numlist if x[0] != ''] objs = {k: (0, v) for k, v in numlist} print(objs.items()) consts = [x for x in answers[0].split(" ") if x not in ['(', ')', '+', '-', '/', '*', '=', ]] present = [x for x in consts if x in objs] if present != consts: print(present, consts) print("missing thing") exit() #simpleanswers = [] for j, eq in enumerate(answers): trips = [] print(j, eq) l, r = [x.strip().split(' ') for x in eq.split('=')] target = 'x' target = (target, objs[target]) #find innermost parens? for i, compound in enumerate([l, r]): while len(compound) > 1: if "(" in compound: rpidx = (len(compound) - 1) - compound[::-1].index('(') lpidx = rpidx+compound[rpidx:].index(")") subeq = compound[rpidx+1:lpidx] substr = "("+''.join(subeq)+")" compound = compound[:rpidx]+[substr]+compound[lpidx+1:] else: subeq = compound[0:3] substr = "("+''.join(subeq)+")" compound = [substr]+compound[3:] if True: p, op, e = subeq p = objs[p] e = objs[e] op = op.strip() trips.append((op, p, e)) pute = (0, makesets.combine(p[1], e[1], op)) objs[substr] = pute if pute == -1: exit() t = training(trips, problem, story, target) for op in t: bigtexamples[op][0].extend(t[op][0]) bigtexamples[op][1].extend(t[op][1]) with open('data/' + sys.argv[1][-1] + ".local.training", 'wb') as f: pickle.dump(bigtexamples, f)
def infer(q, a, VERBOSE): wps = open(q).readlines() answs = open(a).readlines() problematic = open('somethingWrongProblems', 'a') ar = [0, 0] sr = [0, 0] mr = [0, 0] dr = [0, 0] replacements = { ' two ': ' 2 ', " three ": ' 3 ', ' four ': ' 4 ', ' five ': ' 5 ', ' six ': ' 6 ', ' seven ': ' 7 ', ' eight ': ' 8 ', ' nine ': ' 9 ', ' ten ': ' 10 ', ' eleven ': ' 11 ', ' twice ': ' 2 ' } right = 0 guesses = 0 ad = [] wrong = [] multiops = 0 multiopsright = 0 replacements = { ' two ': ' 2 ', " three ": ' 3 ', ' four ': ' 4 ', ' five ': ' 5 ', ' six ': ' 6 ', ' seven ': ' 7 ', ' eight ': ' 8 ', ' nine ': ' 9 ', ' ten ': ' 10 ', ' eleven ': ' 11 ', ' twice ': ' 2 ' } for k in range(len(wps)): if VERBOSE: for i in range(len(wps)): print(i, wps[i]) k = int(input()) print(k) problem = wps[k].lower() #First preprocessing, tokenize slightly problem = problem.strip().split(" ") for i, x in enumerate(problem): if len(x) == 0: continue if x[-1] in [',', '.', '?']: problem[i] = x[:-1] + " " + x[-1] problem = ' '.join(problem) problem = " " + problem + " " print(problem) for r in replacements: problem = problem.replace(r, replacements[r]) story = nlp.parse(problem) sets = makesets.makesets(story['sentences']) #REMOVE DUPS THIS IS BAD: i = 0 while i < len(sets): x = sets[i] dups = [y for y in sets if y[1].num == x[1].num] if len(dups) > 1: for x in dups[1:]: sets.remove(x) i += 1 print("Sets detected: ") for x in sets: x[1].details() numlist = [(cleannum(v.num), v) for k, v in sets] numlist = [x for x in numlist if x[0] != ''] if VERBOSE: for z, v in numlist: v.details() input() allnumbs = {str(k): v for k, v in numlist} objs = {k: (0, v) for k, v in numlist} constraints = [] for i in range(len(numlist)): if numlist[i][0][-1] == "*": if i == 0: continue constraints.append(numlist[i - 1][0] + " * " + numlist[i][0][:-1]) numlist[i] = (''.join([ x for x in numlist[i][0] if x not in ['*', '/'] ]), numlist[i][1]) numlist[i][1].num = numlist[i][0] elif numlist[i][0][0] == "*": if i == 0: continue numlist[i] = (''.join([ x for x in numlist[i][0] if x not in ['*', '/'] ]), numlist[i][1]) tmp = numlist[i - 1] numlist[i - 1] = numlist[i] numlist[i] = tmp constraints.append(numlist[i - 1][0] + " * " + numlist[i][0][1:]) elif numlist[i][0][-1] == "/": if i == 0: continue constraints.append(" / " + numlist[i][0][:-1]) numlist[i] = (''.join([ x for x in numlist[i][0] if x not in ['*', '/'] ]), numlist[i][1]) objs = {k: (0, v) for k, v in numlist} if len(objs) < 2: wrong.append(k) continue if 'x' not in objs: wrong.append(k) continue integerproblem = all( [float(x[0]).is_integer() for x in numlist if x[0] != 'x']) if VERBOSE: print(objs, numlist, [v.num for k, v in sets]) #print(allnumbs) state = [] #print(numlist) #for e in allnumbs.items(): #print(numlist) numidxlist = [x[0] for x in numlist] ST = StringTemplate(numidxlist, inf=True) scores = [] for j, eq in enumerate(ST.equations): #print(j,eq.toString()) good = False if len(constraints) == 0: good = True else: for constraint in constraints: if constraint in eq.toString(): good = True if not good: scores.append(-0.2) continue thisscore = [] #print(eq.toString()) #determine score for this eq l, r = [x.strip().split(' ') for x in eq.toString().split('=')] #print(l,r) if len(r) > 1 and len(l) > 1: scores.append(-0.2) continue if len(r) > 1: compound = r target = l[0] else: #print(constraints) compound = l target = r[0] target = (target, objs[target]) #find innermost parens? while len(compound) > 1: if "(" in compound: rpidx = (len(compound) - 1) - compound[::-1].index('(') lpidx = rpidx + compound[rpidx:].index(")") subeq = compound[rpidx + 1:lpidx] substr = "(" + ''.join(subeq) + ")" compound = compound[:rpidx] + [substr ] + compound[lpidx + 1:] else: subeq = compound[0:3] substr = "(" + ''.join(subeq) + ")" compound = [substr] + compound[3:] if substr in objs: pute = objs[substr] #print(pute[0],pute[1].num) else: p, op, e = subeq #print(p,op,e) p = objs[p][1] e = objs[e][1] op = op.strip() pute = compute(p, op, e, target, problem) #print("OPERATION SELECTED: ",op) #p.details() #e.details() #print(substr,pute[1].num) objs[substr] = pute if pute == -1: exit() score, c = pute thisscore.append(score) #if target[1][1].entity != c.entity: # thisscore.append(-0.2) #print("WAT",thisscore,c.ent,c.num) if len(thisscore) == 0: scores.append(0) else: scores.append(sum(thisscore) / float(len(thisscore))) #print(compound) m = np.argmax(scores) #print(scores[m],ST.equations[m].toString()) srt = sorted([(x, i) for i, x in enumerate(scores)], reverse=True) print('\n Top scoring 3 equations: ') for x, i in srt[:3]: print(x, ST.equations[i].toString()) ''' try: if target.ent=='dozen': guess = solve('('+numlist[0].num+'/12)'+"-"+target.num,'x')[0] print(numlist[0].num+"/12="+target.num) else: guess = solve(numlist[0].num+"-"+target.num,'x')[0] print(numlist[0].num+"="+target.num) ''' eqidxs = [ y[0] for y in sorted( enumerate(scores), key=lambda x: x[1], reverse=True) ] seen = [] tright = 0 for i in eqidxs: if len(seen) >= 1: break eq = ST.equations[i].toString() #eq = eq.replace("=",'-') splitEquation = eq.split('=') eq = splitEquation[0] + '- (' + splitEquation[1] + ')' #print(scores[i], eq) try: guess = solve(eq, 'x')[0] except: guess = -1 # This is the non-negative constraint # wrapped in a "check for complex number" try statement :/ try: if guess < 0: continue except: continue #this is a constraint agianst fractional answers when the problem is integers if not guess.is_integer: if integerproblem: continue if guess not in seen: seen.append(guess) else: continue answ = float(answs[k]) ops = [ x for x in ST.equations[i].toString() if x in ['+', '-', '*', '/'] ] if guess == answ: print("\nCORRECT") tright = 1 ar[0] += ops.count('+') sr[0] += ops.count('-') mr[0] += ops.count('*') dr[0] += ops.count('/') else: print("\nINCORRECT") ar[1] += ops.count('+') sr[1] += ops.count('-') mr[1] += ops.count('*') dr[1] += ops.count('/') print("Guessed Equation : ", ST.equations[i].toString()) print("Guess : ", guess, "\nTrue Answer :", answ, '\n\n') guesses += len(seen) if tright == 1: if multi: multiopsright += 1 right += 1 else: wrong.append(k) #break if VERBOSE: input() continue print(right, guesses) print(multiops, multiopsright) print(ar, sr, mr, dr)
def make_eq(q,a,equations): tdata = [] wps = q #open(q).readlines() answs = a #open(a).readlines() for k in range(len(wps)): answers = get_k_eqs(equations[k],g=True) good = list(set([x for x in answers if x[0]==1])) bad = list(set([x for x in answers if x[0]==0]))[:len(good)] ''' if len(bad)>len(good): bad = sample(bad,len(good)) ''' answers = good+bad if answers == []: continue answers = list(set(answers)) #First preprocessing, tokenize slightly problem = wps[k]#.lower() problem = problem.strip().split(" ") for i,x in enumerate(problem): if len(x)==0:continue if x[-1] in [',','.','?']: problem[i] = x[:-1]+" "+x[-1] problem = ' '.join(problem) problem = " " + problem + " " print(problem) #make story story = nlp.parse(problem) sets = makesets.makesets(story['sentences']) i = 0 xidx = [i for i,x in enumerate(sets) if x[1].num=='x'] if not xidx: print("NO X WHY");continue #TODO look for 2 xes xidx = xidx[0] numlist = [(cleannum(v.num),v) for k,v in sets] numlist = [x for x in numlist if x[0]!=''] allnumbs = {str(k):v for k,v in numlist} objs = {k:(0,v) for k,v in numlist} print(numlist) consts = [x for x in answers[0][1].split(" ") if x not in ['(',')','+','-','/','*','=',]] print(consts) present = [x for x in consts if x in objs] if present!=consts: print(present,consts);print("missing thing");continue scores = [] print(answers) g = 0 b = 0 for j,eq,cons in answers: eqspl = eq.split(" = ") consts = [x for x in eq.split(" ") if x not in ['(',')','+','-','/','*','=',]] order = int(consts==[x[0] for x in numlist]) if order == 0: if eqspl[0].strip() == 'x' or eqspl[1].strip()=='x': eq2 = eqspl[1] + " = " + eqspl[0] consts = [x for x in eq2.split(" ") if x not in ['(',')','+','-','/','*','=',]] order = int(consts==[x[0] for x in numlist]) eq = eq2 if order == 0: continue if j == 1: if g == 1: continue else: g = 1 if j == 0: if b == 1 : continue else: b = 1 ''' consts = [x for x in eq.split(" ") if x not in ['(',')','+','-','/','*','=',]] order = int(consts==[x[0] for x in numlist]) #if order == 0:continue ''' trips = [] print(j,eq) l,r = [x.strip().split(' ') for x in eq.split('=')] consts = " ".join([x for x in answers[0][1].split(" ") if x not in ['(',')','+','-','/','*',]]) consts = consts.split(" = ") target = 'x' target = (target,objs[target]) #find innermost parens? sides = [] thisscore = [] for i,compound in enumerate([l,r]): while len(compound)>1: if "(" in compound: rpidx = (len(compound) - 1) - compound[::-1].index('(') lpidx = rpidx+compound[rpidx:].index(")") subeq = compound[rpidx+1:lpidx] substr = "("+''.join(subeq)+")" compound = compound[:rpidx]+[substr]+compound[lpidx+1:] else: subeq = compound[0:3] substr = "("+''.join(subeq)+")" compound = [substr]+compound[3:] p,op,e = subeq p = objs[p] e = objs[e] op = op.strip() pute = compute(p,op,e,target,problem,story,order) objs[substr]=pute if pute == -1: exit() score,c,vals = pute thisscore.append(score) sides.append(objs[compound[0]]) p = sides[0]; e = sides[1] #thisscore.append(compute(p,'=',e,target,problem,story,order,sp)[0]) score = 1 for s in thisscore: score *= s #scores.append((score,j,eq)) tdata.append(training(sides[0],sides[1],problem,story,target,j,order,score,cons,eq)) f = open("data/"+sys.argv[1][-1]+".global.data",'w') for v in tdata: f.write(str(v[0])+" ") for i,j in enumerate(v[1:]): f.write(str(i+1)+":"+str(j)+" ") f.write("\n")