Beispiel #1
0
def compute(p, op, e, target, problem, story, order, score=None, cons=None):
    if op == '=':
        vec = [order, score, cons]
        vec.extend(makesets.vector(p, e, problem, story, target))
        op_label, op_acc, op_val = svmutils.svm_predict(
            [-1], [vec], glob, '-q -b 1'
        )
    else:
        vec = makesets.vector(p, e, problem, story, target)
        op_label, op_acc, op_val = svmutils.svm_predict(
            [-1], [vec], multi, '-q -b 1'
        )

    op_val = op_val[0]
    if op == '+':
        val = op_val[0]
    if op == '-':
        val = op_val[1]
    if op == '*':
        val = op_val[2]
    if op == '/':
        val = op_val[3]
    if op == '=':
        val = op_val[0]

    c = makesets.combine(p[1], e[1], op)
    return (val, c, op_val)
Beispiel #2
0
def dotrain():
    fn = sys.argv[1]
    with open(fn) as f:
        f = f.split("___")
        for c in f:
            p,a,t,n,

        for j,eq in enumerate(answers):
            trips = []
            print(j,eq)
            l,r = [x.strip().split(' ') for x in eq.split('=')]
            
            compound = l if len(r)==1 else r
            simplex = l if len(l)==1 else r
            target = simplex[0]
            target = (target,objs[target])

            #find innermost parens?
            while len(compound)>1:
                if "(" in compound:
                    rpidx = (len(compound) - 1) - compound[::-1].index('(')
                    lpidx = rpidx+compound[rpidx:].index(")")
                    subeq = compound[rpidx+1:lpidx]
                    substr = "("+''.join(subeq)+")"
                    compound = compound[:rpidx]+[substr]+compound[lpidx+1:]
                else:
                    subeq = compound[0:3]
                    substr = "("+''.join(subeq)+")"
                    compound = [substr]+compound[3:]
                if True:
                    p,op,e = subeq
                    #print(p,op,e)
                    p = objs[p]
                    e = objs[e]
                    op = op.strip()
                    trips.append((op,p,e))
                    pute = (0,makesets.combine(p[1],e[1],op))
                    #print("OPERATION SELECTED: ",op)
                    #p.details()
                    #e.details()
                    #print(substr,pute[1].num)
                    objs[substr]=pute
                if pute == -1:
                    exit()
            if simplex == l:
                trips.append(("=",objs[simplex[0]],objs[compound[0]]))
            else:
                trips.append(("=",objs[compound[0]],objs[simplex[0]]))
            t = training(trips,problem,target)
            for op in t:
                bigtexamples[op][0].extend(t[op][0])
                bigtexamples[op][1].extend(t[op][1])
            print(op,len(bigtexamples[op][0]))
    pickle.dump(bigtexamples,open('data/training.pickle','wb'))
Beispiel #3
0
def compute(p, op, e, target, problem, story, order):
    vec = makesets.vector(p, e, problem, story, target)
    op_label, op_acc, op_val = svmutil.svm_predict([-1], [vec], multi,
                                                   '-q -b 1')
    op_val = op_val[0]
    if op == '+':
        val = op_val[0]
    if op == '-':
        val = op_val[1]
    if op == '*':
        val = op_val[2]
    if op == '/':
        val = op_val[3]
    if op == '=':
        val = op_val[1]

    c = makesets.combine(p[1], e[1], op)
    return (val, c, op_val)
Beispiel #4
0
def compute(p, op, e, target, problem):
    vec = makesets.vector((0, p), (0, e), problem, target)
    #if p.ent == e.ent and op in ['*','/']:
    #    val = 0
    #else:
    if True:
        op_label, op_acc, op_val = svm_predict([-1], [vec], multi, '-q -b 1')
        #pmop_label, pmop_acc, pmop_val = svm_predict([-1], [vec], pm ,'-q -b 1')
        #mmop_label, mmop_acc, mmop_val = svm_predict([-1], [vec], md ,'-q -b 1')
        op_val = op_val[0]
        #pmop_val = pmop_val[0]
        #mmop_val = mmop_val[0]
        if op == '+':
            val = op_val[0]  #*pmop_val[0]
        if op == '-':
            val = op_val[1]  #*pmop_val[1]
        if op == '*':
            val = op_val[2]  #*mmop_val[0]
        if op == '/':
            val = op_val[3]  #*mmop_val[1]

    c = makesets.combine(p, e, op)
    return (val, c)
Beispiel #5
0
def compute(p,op,e,target,problem):
    vec = makesets.vector((0,p),(0,e),problem,target)
    #if p.ent == e.ent and op in ['*','/']:
    #    val = 0
    #else:
    if True:
        op_label, op_acc, op_val = svm_predict([-1], [vec], model ,'-q -b 1')
        #sop_label, sop_acc, sop_val = svm_predict([-1], [vec], smodel ,'-q -b 1')
        #print(op_label,op_acc,op_val)
        op_val=op_val[0]
        #sop_val=sop_val[0]
        #op_val = [op_val[0]*sop_val[0],op_val[1]*sop_val[0],op_val[2]*sop_val[1],op_val[3]*sop_val[1]]
        if op == '+':
            val = op_val[0]
        if op == '-':
            val = op_val[1]
        if op == '*':
            val = op_val[2]
        if op == '/':
            val = op_val[3]


    c = makesets.combine(p,e,op)
    return (val,c)
Beispiel #6
0
def compute(p,op,e,target,problem):
    vec = makesets.vector((0,p),(0,e),problem,target)
    #if p.ent == e.ent and op in ['*','/']:
    #    val = 0
    #else:
    if True:
        op_label, op_acc, op_val = svm_predict([-1], [vec], multi ,'-q -b 1')
        #pmop_label, pmop_acc, pmop_val = svm_predict([-1], [vec], pm ,'-q -b 1')
        #mmop_label, mmop_acc, mmop_val = svm_predict([-1], [vec], md ,'-q -b 1')
        op_val=op_val[0]
        #pmop_val = pmop_val[0]
        #mmop_val = mmop_val[0]
        if op == '+':
            val = op_val[0]#*pmop_val[0]
        if op == '-':
            val = op_val[1]#*pmop_val[1]
        if op == '*':
            val = op_val[2]#*mmop_val[0]
        if op == '/':
            val = op_val[3]#*mmop_val[1]


    c = makesets.combine(p,e,op)
    return (val,c)
Beispiel #7
0
def dotrain():
    if len(sys.argv)>1:
        wps = open(sys.argv[1]).readlines()
        answs = open(sys.argv[2]).readlines()
    else:
        wps = open("emnlp_noIrrelev_p.txt").readlines()
        answs = open("emnlp_noIrrelev_a.txt").readlines()
    problematic = open('nogoodtrainproblems','w')

    bigtexamples = {x:([],[]) for x in ["+","*",'/','-','=']}
    replacements = {' two ':' 2 '," three ":' 3 ',' four ':' 4 ',' five ':' 5 ',' six ':' 6 ',' seven ':' 7 ',' eight ':' 8 ',' nine ':' 9 ',' ten ':' 10 ',' eleven ':' 11 ',' week ':' 7 days ',' dozen ':' 12 of ', ' dozens ': ' 12 ', ' twice ':' 2 '}

    for k in range(len(wps)):
        print(k)
        problem = wps[k].lower()
        for r in replacements:
            problem = problem.replace(r,replacements[r])
        #extract numbers:
        #problem = ' '.join([x.replace(",","") for x in problem.split()])
        story = nlp.parse(problem)
        numbs = makesets.makesets(story['sentences'])

        numlist = [(cleannum(v.num),v) for k,v in numbs]
        numlist = [x for x in numlist if x[0]!='']

        allnumbs = {str(k):v for k,v in numlist}
        if 'x' not in allnumbs:
            if 'x*' not in allnumbs:
                problematic.write('no x :'+problem); continue
            

        objs = {k:(0,v) for k,v in numlist}

        print('start solving')
        print(numlist)
        if len(numlist)<2:
            problematic.write("not enough numbers : "+problem);continue
            
        ST = Solver([x[0] for x in numlist if x[0]!='x'])
        answers = ST.solveEquations(float(answs[k]))
        print('done solving')
        #filter out where = in middle if simpler eq exists
        simpleranswers = [x for x in answers if x.split(" ")[1] == '=' or x.split(" ")[-2]=="="]
        if not answers:
            continue
        if simpleranswers:
            answers = simpleranswers
        else:
            print(answers)
            problematic.write("not simple : "+problem);continue

        answervals = [x for x in answers[0].split(" ") if x not in ['+','-','/','=',')','(','*']]
        numvals = [x[0] for x in numlist if x[0] in answervals]
        xidx = numvals.index("x")
        rightidx = [i for i,x in enumerate(answers) if [z for z in x.split(" ") if z not in ['+','-','/','=',')','(','*']].index('x')==xidx]
        xrightanswers = [answers[i] for i in rightidx]
        if xrightanswers:
            answers = xrightanswers

        for j,eq in enumerate(answers):
            trips = []
            print(j,eq)
            l,r = [x.strip().split(' ') for x in eq.split('=')]
            
            compound = l if len(r)==1 else r
            simplex = l if len(l)==1 else r
            target = simplex[0]
            target = (target,objs[target])

            #find innermost parens?
            while len(compound)>1:
                if "(" in compound:
                    rpidx = (len(compound) - 1) - compound[::-1].index('(')
                    lpidx = rpidx+compound[rpidx:].index(")")
                    subeq = compound[rpidx+1:lpidx]
                    substr = "("+''.join(subeq)+")"
                    compound = compound[:rpidx]+[substr]+compound[lpidx+1:]
                else:
                    subeq = compound[0:3]
                    substr = "("+''.join(subeq)+")"
                    compound = [substr]+compound[3:]
                if True:
                    p,op,e = subeq
                    #print(p,op,e)
                    p = objs[p]
                    e = objs[e]
                    op = op.strip()
                    trips.append((op,p,e))
                    pute = (0,makesets.combine(p[1],e[1],op))
                    #print("OPERATION SELECTED: ",op)
                    #p.details()
                    #e.details()
                    #print(substr,pute[1].num)
                    objs[substr]=pute
                if pute == -1:
                    exit()
            if simplex == l:
                trips.append(("=",objs[simplex[0]],objs[compound[0]]))
            else:
                trips.append(("=",objs[compound[0]],objs[simplex[0]]))
            t = training(trips,problem,target)
            for op in t:
                bigtexamples[op][0].extend(t[op][0])
                bigtexamples[op][1].extend(t[op][1])
            print(op,len(bigtexamples[op][0]))
    pickle.dump(bigtexamples,open('data/dev_training.pickle','wb'))
Beispiel #8
0
def make_eq(q, a, equations):
    bigtexamples = {x: ([], []) for x in ["+", "*", "/", "-", "="]}
    wps = q  # open(q).readlines()
    answs = a  # open(a).readlines()

    for k in range(len(wps)):

        # First preprocessing, tokenize slightly
        problem = wps[k]  # .lower()
        problem = problem.strip().split(" ")
        for i, x in enumerate(problem):
            if len(x) == 0:
                continue
            if x[-1] in [",", ".", "?"]:
                problem[i] = x[:-1] + " " + x[-1]
        problem = " ".join(problem)
        problem = " " + problem + " "
        print(k)
        print(problem)

        # story = nlp.parse(problem)
        story = read_parse(int(equations[k]))
        eqs = get_k_eqs(equations[k])
        answers = [x[1] for x in eqs if x[0] == 1]
        if answers == []:
            continue
        answers = list(set(answers))
        print(answers)

        # make story
        # story = nlp.parse(problem)
        # sets = makesets.makesets(story['sentences'])
        sets = read_sets(equations[k])
        i = 0

        xidx = [i for i, x in enumerate(sets) if x[1].num == "x"]
        if not xidx:
            print("NO X WHY")
            continue

        numlist = [(cleannum(v.num), v) for k, v in sets]
        numlist = [x for x in numlist if x[0] != ""]
        allnumbs = {str(k): v for k, v in numlist}
        objs = {k: (0, v) for k, v in numlist}
        print(objs.items())
        consts = [x for x in answers[0].split(" ") if x not in ["(", ")", "+", "-", "/", "*", "="]]
        present = [x for x in consts if x in objs]
        if present != consts:
            print(present, consts)
            print("missing thing")
            # continue
            continue

        oanswers = []
        for eq in answers:
            consts = [x for x in eq.split(" ") if x not in ["(", ")", "+", "-", "/", "*", "="]]
            order = int(consts == [x[0] for x in numlist])
            if order == 0:
                continue
            else:
                oanswers.append(eq)
        if oanswers == []:
            continue

        answers = oanswers
        print(answers)

        simpleanswers = [x for x in answers if x.split(" ")[-2] == "="]
        if simpleanswers:
            answers = simpleanswers
        else:
            answers = [answers[randint(0, len(answers) - 1)]]
        print(answers)
        # simpleanswers = []

        for j, eq in enumerate(answers):
            trips = []
            print(j, eq)
            l, r = [x.strip().split(" ") for x in eq.split("=")]

            target = "x"
            target = (target, objs[target])

            # find innermost parens?
            sides = []
            for i, compound in enumerate([l, r]):
                while len(compound) > 1:
                    if "(" in compound:
                        rpidx = (len(compound) - 1) - compound[::-1].index("(")
                        lpidx = rpidx + compound[rpidx:].index(")")
                        subeq = compound[rpidx + 1 : lpidx]
                        substr = "(" + "".join(subeq) + ")"
                        compound = compound[:rpidx] + [substr] + compound[lpidx + 1 :]
                    else:
                        subeq = compound[0:3]
                        substr = "(" + "".join(subeq) + ")"
                        compound = [substr] + compound[3:]
                    if True:
                        p, op, e = subeq
                        p = objs[p]
                        e = objs[e]
                        op = op.strip()
                        trips.append((op, p, e))
                        pute = (0, makesets.combine(p[1], e[1], op))
                        objs[substr] = pute
                    if pute == -1:
                        exit()
            t = training(trips, problem, story, target)
            for op in t:
                bigtexamples[op][0].extend(t[op][0])
                bigtexamples[op][1].extend(t[op][1])
    pickle.dump(bigtexamples, open("data/" + sys.argv[1][-1] + ".local.training", "wb"))
Beispiel #9
0
def make_eq(q, a, eqs, VERBOSE, TRAIN):
    bigtexamples = {x: ([], []) for x in ["+", "*", '/', '-', '=']}
    wps = q  #open(q).readlines()
    answs = a  #open(a).readlines()
    if not TRAIN and not VERBOSE:
        out = open(q + ".out.txt", 'w')
    problematic = open('somethingWrongProblems', 'w')

    replacements = {
        ' two ': ' 2 ',
        " three ": ' 3 ',
        ' four ': ' 4 ',
        ' five ': ' 5 ',
        ' six ': ' 6 ',
        ' seven ': ' 7 ',
        ' eight ': ' 8 ',
        ' nine ': ' 9 ',
        ' ten ': ' 10 ',
        ' eleven ': ' 11 ',
        ' twice ': ' 2 '
    }

    for k in range(len(wps)):
        print(eqs[k])
        if eqs[k].strip() == "None": continue
        answers = [eqs[k]]
        if VERBOSE:
            for i in range(len(wps)):
                print(i, wps[i])
            k = int(input())
        print(k)

        #First preprocessing, tokenize slightly
        problem = wps[k].lower()
        problem = problem.strip().split(" ")
        for i, x in enumerate(problem):
            if len(x) == 0: continue
            if x[-1] in [',', '.', '?']:
                problem[i] = x[:-1] + " " + x[-1]
        problem = ' '.join(problem)
        problem = " " + problem + " "
        print(problem)

        for r in replacements:
            problem = problem.replace(r, replacements[r])

        #make story
        story = nlp.parse(problem)
        sets = makesets.makesets(story['sentences'])
        i = 0
        print(sets)
        while i < len(sets):
            dups = [y for y in sets if y[1].idx != None]
            dups = [y for y in dups if y[1].idx == sets[i][1].idx]
            if len(dups) > 1:
                good = [
                    y for y in dups
                    if len([x for x in y[1].num if x.isdigit()]) > 0
                ]
                if good:
                    others = [x for x in dups if x != good[0]]
                    for x in others:
                        sets.remove(x)
                else:
                    # just pick 1
                    for x in dups[1:]:
                        sets.remove(x)
            i += 1

        xidx = [i for i, x in enumerate(sets) if x[1].num == 'x']
        if not xidx:
            problematic.write('no x :' + problem)
            continue

        #TODO look for 2 xes
        xidx = xidx[0]

        numlist = [(cleannum(v.num), v) for k, v in sets]
        numlist = [x for x in numlist if x[0] != '']
        if VERBOSE:
            for z, v in numlist:
                v.details()
            input()

        allnumbs = {str(k): v for k, v in numlist}

        objs = {k: (0, v) for k, v in numlist}

        for j, eq in enumerate(answers):
            trips = []
            print(j, eq)
            l, r = [x.strip().split(' ') for x in eq.split('=')]

            #compound = r if len(l)==1 else l
            #simplex = l if len(l)==1 else r
            target = 'x'
            target = (target, objs[target])

            #find innermost parens?
            sides = []
            for i, compound in enumerate([l, r]):
                while len(compound) > 1:
                    if "(" in compound:
                        rpidx = (len(compound) - 1) - compound[::-1].index('(')
                        lpidx = rpidx + compound[rpidx:].index(")")
                        subeq = compound[rpidx + 1:lpidx]
                        substr = "(" + ''.join(subeq) + ")"
                        compound = compound[:rpidx] + [substr
                                                       ] + compound[lpidx + 1:]
                    else:
                        subeq = compound[0:3]
                        substr = "(" + ''.join(subeq) + ")"
                        compound = [substr] + compound[3:]
                    if True:
                        p, op, e = subeq
                        #print(p,op,e)
                        p = objs[p]
                        e = objs[e]
                        op = op.strip()
                        trips.append((op, p, e))
                        pute = (0, makesets.combine(p[1], e[1], op))
                        #print("OPERATION SELECTED: ",op)
                        #p.details()
                        #e.details()
                        #print(substr,pute[1].num)
                        objs[substr] = pute
                    if pute == -1:
                        exit()
            t = training(trips, problem, story, target)
            for op in t:
                bigtexamples[op][0].extend(t[op][0])
                bigtexamples[op][1].extend(t[op][1])
            print(op, len(bigtexamples[op][0]))
    if TRAIN:
        pickle.dump(bigtexamples, open('data/' + OUT + ".local.training",
                                       'wb'))
Beispiel #10
0
def make_eq(q, a, VERBOSE, TRAIN):
    bigtexamples = {x: ([], []) for x in ["+", "*", '/', '-', '=']}
    wps = open(q).readlines()
    answs = open(a).readlines()
    if not TRAIN and not VERBOSE:
        out = open(q + ".out.txt", 'w')
    problematic = open('somethingWrongProblems', 'w')

    replacements = {
        ' two ': ' 2 ',
        " three ": ' 3 ',
        ' four ': ' 4 ',
        ' five ': ' 5 ',
        ' six ': ' 6 ',
        ' seven ': ' 7 ',
        ' eight ': ' 8 ',
        ' nine ': ' 9 ',
        ' ten ': ' 10 ',
        ' eleven ': ' 11 ',
        ' twice ': ' 2 '
    }

    for k in range(len(wps)):
        if VERBOSE:
            for i in range(len(wps)):
                print(i, wps[i])
            k = int(input())
        print(k)
        problem = wps[k].lower()
        #First preprocessing, tokenize slightly
        problem = problem.strip().split(" ")
        for i, x in enumerate(problem):
            if len(x) == 0: continue
            if x[-1] in [',', '.', '?']:
                problem[i] = x[:-1] + " " + x[-1]
        problem = ' '.join(problem)
        problem = " " + problem + " "
        print(problem)

        for r in replacements:
            problem = problem.replace(r, replacements[r])
        '''
        if " how " in problem:
            left,right = problem.split(" how ")
        else: left = problem

        for r in replacements:
            left = left.replace(r,replacements[r])
        if " how " in problem:
            problem = left + ' how ' + right
        else:
            problem = left
        '''

        story = nlp.parse(problem)
        sets = makesets.makesets(story['sentences'])
        i = 0
        print(sets)
        while i < len(sets):
            dups = [y for y in sets if y[1].idx != None]
            dups = [y for y in dups if y[1].idx == sets[i][1].idx]
            if len(dups) > 1:
                good = [
                    y for y in dups
                    if len([x for x in y[1].num if x.isdigit()]) > 0
                ]
                if good:
                    others = [x for x in dups if x != good[0]]
                    for x in others:
                        sets.remove(x)
                else:
                    # just pick 1
                    for x in dups[1:]:
                        sets.remove(x)
            i += 1

        xidx = [i for i, x in enumerate(sets) if x[1].num == 'x']
        if not xidx:
            problematic.write('no x :' + problem)
            continue

        #TODO look for 2 xes
        xidx = xidx[0]
        twoToRight = False
        if xidx > 0:
            print(len(sets), xidx)
            if sets[xidx - 1][1].entity == 'dozen':
                # 2 vals to right
                twoToRight = True
        if len(sets) - xidx > 1:
            if sets[xidx + 1][1].entity == 'dozen':
                twoToRight = True
        if len(sets) - xidx < 3:
            if sets[xidx][1].entity == 'dozen':
                twoToRight = True

        numlist = [(cleannum(v.num), v) for k, v in sets]
        numlist = [x for x in numlist if x[0] != '']
        if VERBOSE:
            for z, v in numlist:
                v.details()
            input()

        allnumbs = {str(k): v for k, v in numlist}

        objs = {k: (0, v) for k, v in numlist}

        print('start solving')
        print(numlist)
        if len(numlist) < 2:
            problematic.write("not enough numbers : " + problem)
            continue

        values = [x[0] for x in numlist if x[0] != 'x']
        print(values)
        ST = Solver(values)

        answers = []
        answers = ST.solveEquations(float(answs[k]))
        print(answs[k])
        if not answers:
            problematic.write("No answers : " + problem + "\n")
            problematic.write(str([x[0] for x in numlist]) + '\n')
            problematic.write(answs[k] + '\n')
            continue
        print('done solving')

        # if target has 2 entities, try eqs with = x op y format
        simpleranswers = None
        if twoToRight:
            try:
                simpleranswers = [
                    x for x in answers if x.split(" ")[-4] == "=" and (
                        x.split(" ")[-3] == 'x' or x.split(' ')[-1] == 'x')
                ]
            except:
                pass
        if not simpleranswers:
            simpleranswers = [
                x for x in answers
                if x.split(" ")[1] == '=' or x.split(" ")[-2] == "="
            ]
        #simpleranswers = [x for x in answers if x.split(" ")[1] == '=' or x.split(" ")[-2]=="="]

        #filter out where = in middle if simpler eq exists
        if simpleranswers:
            print(answers)
            answers = simpleranswers[:]
        else:
            problematic.write("not simple : " + problem + "\n")
            continue

        values = [x[0] for x in numlist]
        xidx = values.index('x')
        print(simpleranswers)
        print(xidx)
        for a in simpleranswers:
            aspl = [
                x for x in a.split(" ")
                if x not in ["/", "-", '+', '*', '=', '(', ')']
            ]
            print(a)
            print(aspl)
            print(values)
            aidx = aspl.index('x')
            print(aidx)
            if aidx != xidx:
                print("removing ", a)
                answers.remove(a)
        print(answers)
        if answers == []:
            answers = simpleranswers

        print(answers)
        if not VERBOSE:
            if not TRAIN:
                out.write(problem + '\n')
                out.write(answs[k] + "\n")
                out.write(str([x[0] for x in numlist]))
                out.write("\n")
                for x in answers:
                    out.write(x + "\n")
                out.write("___\n")

        if VERBOSE:
            input()
        if not TRAIN:
            continue

        if len([x for x in answers if x.split(" ")[-2] == "="]) > 0:
            answers = [x for x in answers if x.split(" ")[-2] == "="]

        c = randint(0, len(answers) - 1)
        answers = [answers[c]]
        for j, eq in enumerate(answers):
            trips = []
            print(j, eq)
            l, r = [x.strip().split(' ') for x in eq.split('=')]

            compound = r if len(l) == 1 else l
            simplex = l if len(l) == 1 else r
            target = simplex[0]
            target = (target, objs[target])

            #find innermost parens?
            while len(compound) > 1:
                if "(" in compound:
                    rpidx = (len(compound) - 1) - compound[::-1].index('(')
                    lpidx = rpidx + compound[rpidx:].index(")")
                    subeq = compound[rpidx + 1:lpidx]
                    substr = "(" + ''.join(subeq) + ")"
                    compound = compound[:rpidx] + [substr
                                                   ] + compound[lpidx + 1:]
                else:
                    subeq = compound[0:3]
                    substr = "(" + ''.join(subeq) + ")"
                    compound = [substr] + compound[3:]
                if True:
                    p, op, e = subeq
                    #print(p,op,e)
                    p = objs[p]
                    e = objs[e]
                    op = op.strip()
                    trips.append((op, p, e))
                    pute = (0, makesets.combine(p[1], e[1], op))
                    #print("OPERATION SELECTED: ",op)
                    #p.details()
                    #e.details()
                    #print(substr,pute[1].num)
                    objs[substr] = pute
                if pute == -1:
                    exit()
            if simplex == l:
                trips.append(("=", objs[simplex[0]], objs[compound[0]]))
            else:
                trips.append(("=", objs[compound[0]], objs[simplex[0]]))
            t = training(trips, problem, target)
            for op in t:
                bigtexamples[op][0].extend(t[op][0])
                bigtexamples[op][1].extend(t[op][1])
            print(op, len(bigtexamples[op][0]))
    if TRAIN:
        pickle.dump(bigtexamples, open('data/' + OUT + ".training", 'wb'))
Beispiel #11
0
def make_eq(q, a, equations):
    bigtexamples = {x: ([], []) for x in ["+", "*", '/', '-', '=']}
    wps = q  # open(q).readlines()

    for k in range(len(wps)):

        # First preprocessing, tokenize slightly
        problem = utils.preprocess_problem(wps[k])
        print(k)
        print(problem)

        # story = nlp.parse(problem)
        story = utils.read_parse(int(equations[k]))
        eqs = utils.get_k_eqs(equations[k])
        answers = [x[1] for x in eqs if x[0] == 1]
        if answers == []:
            continue
        answers = list(set(answers))
        print(story["sentences"][0]["text"])
        print(answers)

        #make story
        #story = nlp.parse(problem)
        sets = makesets.makesets(story['sentences'])
        i = 0

        xidx = [i for i, x in enumerate(sets) if x[1].num == 'x']
        if not xidx:
            print("NO X WHY")
            continue

        numlist = [(utils.cleannum(v.num), v) for k, v in sets]
        numlist = [x for x in numlist if x[0] != '']
        objs = {k: (0, v) for k, v in numlist}
        print(objs.items())
        consts = [
            x for x in answers[0].split(" ") if x not in [
                '(',
                ')',
                '+',
                '-',
                '/',
                '*',
                '=',
            ]
        ]
        present = [x for x in consts if x in objs]
        if present != consts:
            print(present, consts)
            print("missing thing")
            exit()

        #simpleanswers = []
        for j, eq in enumerate(answers):
            trips = []
            print(j, eq)
            l, r = [x.strip().split(' ') for x in eq.split('=')]
            target = 'x'
            target = (target, objs[target])

            #find innermost parens?
            for i, compound in enumerate([l, r]):
                while len(compound) > 1:
                    if "(" in compound:
                        rpidx = (len(compound) - 1) - compound[::-1].index('(')
                        lpidx = rpidx + compound[rpidx:].index(")")
                        subeq = compound[rpidx + 1:lpidx]
                        substr = "(" + ''.join(subeq) + ")"
                        compound = compound[:rpidx] + [substr
                                                       ] + compound[lpidx + 1:]
                    else:
                        subeq = compound[0:3]
                        substr = "(" + ''.join(subeq) + ")"
                        compound = [substr] + compound[3:]
                    if True:
                        p, op, e = subeq
                        p = objs[p]
                        e = objs[e]
                        op = op.strip()
                        trips.append((op, p, e))
                        pute = (0, makesets.combine(p[1], e[1], op))
                        objs[substr] = pute
                    if pute == -1:
                        exit()
            t = training(trips, problem, story, target)
            for op in t:
                bigtexamples[op][0].extend(t[op][0])
                bigtexamples[op][1].extend(t[op][1])
    with open('data/' + sys.argv[1][-1] + ".local.training", 'wb') as f:
        pickle.dump(bigtexamples, f)
def make_eq(q):
    bigtexamples = {x:([],[]) for x in ["+","*",'/','-','=']}
    wps,eqs= parse(q)
    

    for k in range(len(wps)):
        if len(wps[k])==0:continue
        problem = wps[k][0].lower()
        story = nlp.parse(problem)
        sets = makesets.makesets(story['sentences'])
        i = 0
        print(sets)
        while i < len(sets):
            dups = [y for y in sets if y[1].idx != None]
            dups = [y for y in dups if y[1].idx == sets[i][1].idx]
            if len(dups)>1:
                good = [y for y in dups if len([x for x in y[1].num if x.isdigit()])>0]
                if good:
                    others = [x for x in dups if x!=good[0]]
                    for x in others:
                        sets.remove(x)
                else:
                    # just pick 1
                    for x in dups[1:]:
                        sets.remove(x)
            i+=1


        xidx = [x for x in sets if x[1].num=='x']
        if not xidx:
            problematic.write('no x :'+problem); continue

        #TODO look for 2 xes
        '''
        xidx = xidx[0][0]
        postx = [x for x in numbs if x[0]>=xidx]
        if len(postx)>1:
            # 2 vals to right
            twoToRight = True
        else:
            twoToRight = False
        '''

        



        numlist = [(cleannum(v.num),v) for k,v in sets]
        numlist = [x for x in numlist if x[0]!='']

        allnumbs = {str(k):v for k,v in numlist}
            

        objs = {k:(0,v) for k,v in numlist}
        answers = eqs[k]

        for j,eq in enumerate(answers):
            trips = []
            print(j,eq)
            l,r = [x.strip().split(' ') for x in eq.split('=')]
            
            compound = r if len(l)==1 else l
            simplex = l if len(l)==1 else r
            target = simplex[0]
            target = (target,objs[target])

            #find innermost parens?
            while len(compound)>1:
                if "(" in compound:
                    rpidx = (len(compound) - 1) - compound[::-1].index('(')
                    lpidx = rpidx+compound[rpidx:].index(")")
                    subeq = compound[rpidx+1:lpidx]
                    substr = "("+''.join(subeq)+")"
                    compound = compound[:rpidx]+[substr]+compound[lpidx+1:]
                else:
                    subeq = compound[0:3]
                    substr = "("+''.join(subeq)+")"
                    compound = [substr]+compound[3:]
                if True:
                    p,op,e = subeq
                    #print(p,op,e)
                    p = objs[p]
                    e = objs[e]
                    op = op.strip()
                    trips.append((op,p,e))
                    pute = (0,makesets.combine(p[1],e[1],op))
                    #print("OPERATION SELECTED: ",op)
                    #p.details()
                    #e.details()
                    #print(substr,pute[1].num)
                    objs[substr]=pute
                if pute == -1:
                    exit()
            if simplex == l:
                trips.append(("=",objs[simplex[0]],objs[compound[0]]))
            else:
                trips.append(("=",objs[compound[0]],objs[simplex[0]]))
            t = training(trips,problem,target)
            for op in t:
                bigtexamples[op][0].extend(t[op][0])
                bigtexamples[op][1].extend(t[op][1])
            print(op,len(bigtexamples[op][0]))
    pickle.dump(bigtexamples,open('data/gold_training.pickle','wb'))
Beispiel #13
0
def dotrain():
    if len(sys.argv) > 1:
        wps = open(sys.argv[1]).readlines()
        answs = open(sys.argv[2]).readlines()
    else:
        wps = open("emnlp_noIrrelev_p.txt").readlines()
        answs = open("emnlp_noIrrelev_a.txt").readlines()
    problematic = open('nogoodtrainproblems', 'w')

    bigtexamples = {x: ([], []) for x in ["+", "*", '/', '-', '=']}
    replacements = {
        ' two ': ' 2 ',
        " three ": ' 3 ',
        ' four ': ' 4 ',
        ' five ': ' 5 ',
        ' six ': ' 6 ',
        ' seven ': ' 7 ',
        ' eight ': ' 8 ',
        ' nine ': ' 9 ',
        ' ten ': ' 10 ',
        ' eleven ': ' 11 ',
        ' week ': ' 7 days ',
        ' dozen ': ' 12 of ',
        ' dozens ': ' 12 ',
        ' twice ': ' 2 '
    }

    for k in range(len(wps)):
        print(k)
        problem = wps[k].lower()
        for r in replacements:
            problem = problem.replace(r, replacements[r])
        #extract numbers:
        #problem = ' '.join([x.replace(",","") for x in problem.split()])
        story = nlp.parse(problem)
        numbs = makesets.makesets(story['sentences'])

        numlist = [(cleannum(v.num), v) for k, v in numbs]
        numlist = [x for x in numlist if x[0] != '']

        allnumbs = {str(k): v for k, v in numlist}
        if 'x' not in allnumbs:
            if 'x*' not in allnumbs:
                problematic.write('no x :' + problem)
                continue

        objs = {k: (0, v) for k, v in numlist}

        print('start solving')
        print(numlist)
        if len(numlist) < 2:
            problematic.write("not enough numbers : " + problem)
            continue

        ST = Solver([x[0] for x in numlist if x[0] != 'x'])
        answers = ST.solveEquations(float(answs[k]))
        print('done solving')
        #filter out where = in middle if simpler eq exists
        simpleranswers = [
            x for x in answers
            if x.split(" ")[1] == '=' or x.split(" ")[-2] == "="
        ]
        if not answers:
            continue
        if simpleranswers:
            answers = simpleranswers
        else:
            print(answers)
            problematic.write("not simple : " + problem)
            continue

        answervals = [
            x for x in answers[0].split(" ")
            if x not in ['+', '-', '/', '=', ')', '(', '*']
        ]
        numvals = [x[0] for x in numlist if x[0] in answervals]
        xidx = numvals.index("x")
        rightidx = [
            i for i, x in enumerate(answers) if [
                z for z in x.split(" ")
                if z not in ['+', '-', '/', '=', ')', '(', '*']
            ].index('x') == xidx
        ]
        xrightanswers = [answers[i] for i in rightidx]
        if xrightanswers:
            answers = xrightanswers

        for j, eq in enumerate(answers):
            trips = []
            print(j, eq)
            l, r = [x.strip().split(' ') for x in eq.split('=')]

            compound = l if len(r) == 1 else r
            simplex = l if len(l) == 1 else r
            target = simplex[0]
            target = (target, objs[target])

            #find innermost parens?
            while len(compound) > 1:
                if "(" in compound:
                    rpidx = (len(compound) - 1) - compound[::-1].index('(')
                    lpidx = rpidx + compound[rpidx:].index(")")
                    subeq = compound[rpidx + 1:lpidx]
                    substr = "(" + ''.join(subeq) + ")"
                    compound = compound[:rpidx] + [substr
                                                   ] + compound[lpidx + 1:]
                else:
                    subeq = compound[0:3]
                    substr = "(" + ''.join(subeq) + ")"
                    compound = [substr] + compound[3:]
                if True:
                    p, op, e = subeq
                    #print(p,op,e)
                    p = objs[p]
                    e = objs[e]
                    op = op.strip()
                    trips.append((op, p, e))
                    pute = (0, makesets.combine(p[1], e[1], op))
                    #print("OPERATION SELECTED: ",op)
                    #p.details()
                    #e.details()
                    #print(substr,pute[1].num)
                    objs[substr] = pute
                if pute == -1:
                    exit()
            if simplex == l:
                trips.append(("=", objs[simplex[0]], objs[compound[0]]))
            else:
                trips.append(("=", objs[compound[0]], objs[simplex[0]]))
            t = training(trips, problem, target)
            for op in t:
                bigtexamples[op][0].extend(t[op][0])
                bigtexamples[op][1].extend(t[op][1])
            print(op, len(bigtexamples[op][0]))
    pickle.dump(bigtexamples, open('data/dev_training.pickle', 'wb'))
Beispiel #14
0
def make_eq(q, a, equations):
    tdata = []
    wps = q  #open(q).readlines()
    answs = a  #open(a).readlines()

    for k in range(len(wps)):
        answers = get_k_eqs(equations[k])
        if answers == []: continue
        answers = list(set(answers))

        #First preprocessing, tokenize slightly
        problem = wps[k]  #.lower()
        problem = problem.strip().split(" ")
        for i, x in enumerate(problem):
            if len(x) == 0: continue
            if x[-1] in [',', '.', '?']:
                problem[i] = x[:-1] + " " + x[-1]
        problem = ' '.join(problem)
        problem = " " + problem + " "
        print(problem)

        #make story
        story = nlp.parse(problem)
        sets = makesets.makesets(story['sentences'])
        i = 0

        xidx = [i for i, x in enumerate(sets) if x[1].num == 'x']
        if not xidx:
            print("NO X WHY")
            continue

        #TODO look for 2 xes
        xidx = xidx[0]

        numlist = [(cleannum(v.num), v) for k, v in sets]
        numlist = [x for x in numlist if x[0] != '']
        allnumbs = {str(k): v for k, v in numlist}
        objs = {k: (0, v) for k, v in numlist}
        print(objs.items())
        consts = [
            x for x in answers[0][1].split(" ") if x not in [
                '(',
                ')',
                '+',
                '-',
                '/',
                '*',
                '=',
            ]
        ]
        present = [x for x in consts if x in objs]
        if consts != present:
            print(present, consts)
            print("missing thing")
            continue
        order = int(consts == [x[0] for x in numlist])

        for j, eq in answers:
            #j = randint(0,len(answers)-1)
            #eq = answers[j]
            trips = []
            print(j, eq)
            l, r = [x.strip().split(' ') for x in eq.split('=')]
            consts = " ".join([
                x for x in answers[0][1].split(" ") if x not in [
                    '(',
                    ')',
                    '+',
                    '-',
                    '/',
                    '*',
                ]
            ])
            consts = consts.split(" = ")
            sp = (objs[consts[0].split(" ")[-1]][1],
                  objs[consts[1].split(" ")[0]][1])

            target = 'x'
            target = (target, objs[target])

            #find innermost parens?
            sides = []
            for i, compound in enumerate([l, r]):
                while len(compound) > 1:
                    if "(" in compound:
                        rpidx = (len(compound) - 1) - compound[::-1].index('(')
                        lpidx = rpidx + compound[rpidx:].index(")")
                        subeq = compound[rpidx + 1:lpidx]
                        substr = "(" + ''.join(subeq) + ")"
                        compound = compound[:rpidx] + [substr
                                                       ] + compound[lpidx + 1:]
                    else:
                        subeq = compound[0:3]
                        substr = "(" + ''.join(subeq) + ")"
                        compound = [substr] + compound[3:]
                    if True:
                        p, op, e = subeq
                        p = objs[p]
                        e = objs[e]
                        op = op.strip()
                        #trips.append((op,p,e))
                        pute = (0, makesets.combine(p[1], e[1], op))
                        objs[substr] = pute
                    if pute == -1:
                        exit()
                sides.append(objs[compound[0]])
            tdata.append(
                training(sides[0], sides[1], problem, story, target, j, order,
                         sp))

    f = open("data/" + sys.argv[1][-1] + ".global.data", 'w')
    for v in tdata:
        f.write(str(v[0]) + " ")
        for i, j in enumerate(v[1:]):
            f.write(str(i + 1) + ":" + str(j) + " ")
        f.write("\n")
Beispiel #15
0
def make_eq(q, a, equations):
    bigtexamples = {x: ([], []) for x in ["+", "*", '/', '-', '=']}
    wps = q  # open(q).readlines()

    for k in range(len(wps)):

        # First preprocessing, tokenize slightly
        problem = utils.preprocess_problem(wps[k])
        print(k)
        print(problem)

        # story = nlp.parse(problem)
        story = utils.read_parse(int(equations[k]))
        eqs = utils.get_k_eqs(equations[k])
        answers = [x[1] for x in eqs if x[0] == 1]
        if answers == []:
            continue
        answers = list(set(answers))
        print(story["sentences"][0]["text"])
        print(answers)

        #make story
        #story = nlp.parse(problem)
        sets = makesets.makesets(story['sentences'])
        i = 0

        xidx = [i for i, x in enumerate(sets) if x[1].num == 'x']
        if not xidx:
            print("NO X WHY")
            continue

        numlist = [(utils.cleannum(v.num), v) for k, v in sets]
        numlist = [x for x in numlist if x[0] != '']
        objs = {k: (0, v) for k, v in numlist}
        print(objs.items())
        consts = [x for x in answers[0].split(" ")
                  if x not in ['(', ')', '+', '-', '/', '*', '=', ]]
        present = [x for x in consts if x in objs]
        if present != consts:
            print(present, consts)
            print("missing thing")
            exit()

        #simpleanswers = []
        for j, eq in enumerate(answers):
            trips = []
            print(j, eq)
            l, r = [x.strip().split(' ') for x in eq.split('=')]
            target = 'x'
            target = (target, objs[target])

            #find innermost parens?
            for i, compound in enumerate([l, r]):
                while len(compound) > 1:
                    if "(" in compound:
                        rpidx = (len(compound) - 1) - compound[::-1].index('(')
                        lpidx = rpidx+compound[rpidx:].index(")")
                        subeq = compound[rpidx+1:lpidx]
                        substr = "("+''.join(subeq)+")"
                        compound = compound[:rpidx]+[substr]+compound[lpidx+1:]
                    else:
                        subeq = compound[0:3]
                        substr = "("+''.join(subeq)+")"
                        compound = [substr]+compound[3:]
                    if True:
                        p, op, e = subeq
                        p = objs[p]
                        e = objs[e]
                        op = op.strip()
                        trips.append((op, p, e))
                        pute = (0, makesets.combine(p[1], e[1], op))
                        objs[substr] = pute
                    if pute == -1:
                        exit()
            t = training(trips, problem, story, target)
            for op in t:
                bigtexamples[op][0].extend(t[op][0])
                bigtexamples[op][1].extend(t[op][1])
    with open('data/' + sys.argv[1][-1] + ".local.training", 'wb') as f:
        pickle.dump(bigtexamples, f)
Beispiel #16
0
def make_eq(q, a, equations):
    bigtexamples = {x: ([], []) for x in ["+", "*", '/', '-', '=']}
    wps = q  #open(q).readlines()
    answs = a  #open(a).readlines()

    for k in range(len(wps)):
        eqs = get_k_eqs(equations[k])
        answers = [x[1] for x in eqs if x[0] == 1]
        answers = [x for x in answers if x.split()[-2] == '=']
        answers = [x for x in answers if x.split()[-1] == 'x']
        if answers == []: continue
        answers = list(set(answers))

        #First preprocessing, tokenize slightly
        problem = wps[k]  #.lower()
        problem = problem.strip().split(" ")
        for i, x in enumerate(problem):
            if len(x) == 0: continue
            if x[-1] in [',', '.', '?']:
                problem[i] = x[:-1] + " " + x[-1]
        problem = ' '.join(problem)
        problem = " " + problem + " "
        print(k)
        print(problem)

        #make story
        story = nlp.parse(problem)
        sets = makesets.makesets(story['sentences'])
        i = 0

        xidx = [i for i, x in enumerate(sets) if x[1].num == 'x']
        if not xidx:
            print("NO X WHY")
            continue

        numlist = [(cleannum(v.num), v) for k, v in sets]
        numlist = [x for x in numlist if x[0] != '']
        allnumbs = {str(k): v for k, v in numlist}
        objs = {k: (0, v) for k, v in numlist}
        print(objs.items())
        consts = [
            x for x in answers[0].split(" ") if x not in [
                '(',
                ')',
                '+',
                '-',
                '/',
                '*',
                '=',
            ]
        ]
        present = [x for x in consts if x in objs]
        if present != consts:
            print(present, consts)
            print("missing thing")
            continue

        #simpleanswers = []
        #for x in answers:
        #    try:
        #        x1 = x[1].strip().split(" ")
        #        if x[-2]=='=' and x[-1]=='x':
        #            simplenaswers.append(x)
        #    except:
        #        pass
        #if simpleanswers:
        #    answers = simpleanswers

        #ri = randint(0,len(answers)-1)
        #if answers == []:
        #    continue
        #answers = [answers[ri]]
        for j, eq in enumerate(answers):
            trips = []
            print(j, eq)
            l, r = [x.strip().split(' ') for x in eq.split('=')]

            target = 'x'
            target = (target, objs[target])

            #find innermost parens?
            sides = []
            for i, compound in enumerate([l, r]):
                while len(compound) > 1:
                    if "(" in compound:
                        rpidx = (len(compound) - 1) - compound[::-1].index('(')
                        lpidx = rpidx + compound[rpidx:].index(")")
                        subeq = compound[rpidx + 1:lpidx]
                        substr = "(" + ''.join(subeq) + ")"
                        compound = compound[:rpidx] + [substr
                                                       ] + compound[lpidx + 1:]
                    else:
                        subeq = compound[0:3]
                        substr = "(" + ''.join(subeq) + ")"
                        compound = [substr] + compound[3:]
                    if True:
                        p, op, e = subeq
                        p = objs[p]
                        e = objs[e]
                        op = op.strip()
                        trips.append((op, p, e))
                        pute = (0, makesets.combine(p[1], e[1], op))
                        objs[substr] = pute
                    if pute == -1:
                        exit()
            t = training(trips, problem, story, target, sets)
            for op in t:
                bigtexamples[op][0].extend(t[op][0])
                bigtexamples[op][1].extend(t[op][1])
    pickle.dump(bigtexamples, open('data/ixl.local.training', 'wb'))
Beispiel #17
0
def make_eq(q,a,VERBOSE,TRAIN):
    bigtexamples = {x:([],[]) for x in ["+","*",'/','-','=']}
    wps = open(q).readlines()
    answs = open(a).readlines()
    if not TRAIN and not VERBOSE:
        out = open(q+".out.txt",'w')
    problematic = open('somethingWrongProblems','w')


    

    replacements = {' two ':' 2 '," three ":' 3 ',' four ':' 4 ',' five ':' 5 ',' six ':' 6 ',' seven ':' 7 ',' eight ':' 8 ',' nine ':' 9 ',' ten ':' 10 ',' eleven ':' 11 ', ' twice ':' 2 '}

    for k in range(len(wps)):
        if VERBOSE:
            for i in range(len(wps)):
                print(i,wps[i])
            k = int(input())
        print(k)
        problem = wps[k].lower()
        #First preprocessing, tokenize slightly
        problem = problem.strip().split(" ")
        for i,x in enumerate(problem):
            if len(x)==0:continue
            if x[-1] in [',','.','?']:
                problem[i] = x[:-1]+" "+x[-1]
        problem = ' '.join(problem)
        problem = " " + problem + " "
        print(problem)

        for r in replacements:
            problem = problem.replace(r,replacements[r])
        
        '''
        if " how " in problem:
            left,right = problem.split(" how ")
        else: left = problem

        for r in replacements:
            left = left.replace(r,replacements[r])
        if " how " in problem:
            problem = left + ' how ' + right
        else:
            problem = left
        '''

        story = nlp.parse(problem)
        sets = makesets.makesets(story['sentences'])
        i = 0
        print(sets)
        while i < len(sets):
            dups = [y for y in sets if y[1].idx != None]
            dups = [y for y in dups if y[1].idx == sets[i][1].idx]
            if len(dups)>1:
                good = [y for y in dups if len([x for x in y[1].num if x.isdigit()])>0]
                if good:
                    others = [x for x in dups if x!=good[0]]
                    for x in others:
                        sets.remove(x)
                else:
                    # just pick 1
                    for x in dups[1:]:
                        sets.remove(x)
            i+=1


        xidx = [x for x in sets if x[1].num=='x']
        if not xidx:
            problematic.write('no x :'+problem); continue

        #TODO look for 2 xes
        '''
        xidx = xidx[0][0]
        postx = [x for x in numbs if x[0]>=xidx]
        if len(postx)>1:
            # 2 vals to right
            twoToRight = True
        else:
            twoToRight = False
        '''

        



        numlist = [(cleannum(v.num),v) for k,v in sets]
        numlist = [x for x in numlist if x[0]!='']
        if VERBOSE:
            for z,v in numlist:
                v.details()
            input()

        allnumbs = {str(k):v for k,v in numlist}
            

        objs = {k:(0,v) for k,v in numlist}

        print('start solving')
        print(numlist)
        if len(numlist)<2:
            problematic.write("not enough numbers : "+problem);continue
            
        values = [x[0] for x in numlist if x[0]!='x']
        print(values)
        ST = Solver(values)

        answers = []
        answers = ST.solveEquations(float(answs[k]))
        if not answers:
            problematic.write("No answers : " + problem + "\n")
            problematic.write(str([x[0] for x in numlist])+'\n')
            problematic.write(answs[k]+'\n')
            continue
        print('done solving')

        # if target has 2 entities, try eqs with = x op y format
        '''
        simpleranswers = None
        if twoToRight:
            try:
                simpleranswers = [x for x in answers if x.split(" ")[-4]=="=" and x.split(" ")[-3]=='x']
            except:
                pass
        if not simpleranswers:
            simpleranswers = [x for x in answers if x.split(" ")[1] == '=' or x.split(" ")[-2]=="="]
        '''
        simpleranswers = [x for x in answers if x.split(" ")[1] == '=' or x.split(" ")[-2]=="="]

        #filter out where = in middle if simpler eq exists
        if simpleranswers:
            answers = simpleranswers[:]
        else:
            problematic.write("not simple : "+problem+"\n");continue

        values = [x[0] for x in numlist]
        xidx = values.index('x')
        print(xidx)
        for a in simpleranswers:
            aspl = [x for x in a.split(" ") if x not in ["/","-",'+','*','=','(',')']]
            print(a);print(aspl);print(values)
            aidx = aspl.index('x')
            print(aidx)
            if aidx != xidx:
                print("removing ",a)
                answers.remove(a)
        print(answers)
        if answers==[]:
            answers = simpleranswers


        
        print(answers)
        if not VERBOSE:
            if not TRAIN:
                out.write(problem + '\n')
                out.write(answs[k] + "\n")
                out.write(str([x[0] for x in numlist]))
                out.write("\n")
                for x in answers:
                    out.write(x + "\n")
                out.write("___\n")

        if VERBOSE:
            input()
        if not TRAIN:
            continue

        c = randint(0,len(answers)-1)
        answers = [answers[c]]
        for j,eq in enumerate(answers):
            trips = []
            print(j,eq)
            l,r = [x.strip().split(' ') for x in eq.split('=')]
            
            compound = r if len(l)==1 else l
            simplex = l if len(l)==1 else r
            target = simplex[0]
            target = (target,objs[target])

            #find innermost parens?
            while len(compound)>1:
                if "(" in compound:
                    rpidx = (len(compound) - 1) - compound[::-1].index('(')
                    lpidx = rpidx+compound[rpidx:].index(")")
                    subeq = compound[rpidx+1:lpidx]
                    substr = "("+''.join(subeq)+")"
                    compound = compound[:rpidx]+[substr]+compound[lpidx+1:]
                else:
                    subeq = compound[0:3]
                    substr = "("+''.join(subeq)+")"
                    compound = [substr]+compound[3:]
                if True:
                    p,op,e = subeq
                    #print(p,op,e)
                    p = objs[p]
                    e = objs[e]
                    op = op.strip()
                    trips.append((op,p,e))
                    pute = (0,makesets.combine(p[1],e[1],op))
                    #print("OPERATION SELECTED: ",op)
                    #p.details()
                    #e.details()
                    #print(substr,pute[1].num)
                    objs[substr]=pute
                if pute == -1:
                    exit()
            if simplex == l:
                trips.append(("=",objs[simplex[0]],objs[compound[0]]))
            else:
                trips.append(("=",objs[compound[0]],objs[simplex[0]]))
            t = training(trips,problem,target)
            for op in t:
                bigtexamples[op][0].extend(t[op][0])
                bigtexamples[op][1].extend(t[op][1])
            print(op,len(bigtexamples[op][0]))
    if TRAIN:
        pickle.dump(bigtexamples,open('data/'+OUT+".training",'wb'))