コード例 #1
0
ファイル: ILPformat.py プロジェクト: MathProblems/August
def make_eq(q, a, VERBOSE, TRAIN):
    bigtexamples = {x: ([], []) for x in ["+", "*", '/', '-', '=']}
    #wps = open(q).readlines()
    #answs = open(a).readlines()
    #VERBOSE=True
    wps = q

    for k in range(len(wps)):
        if VERBOSE:
            for i in range(len(wps)):
                print(i, wps[i])
            k = int(input())
        print(k)
        problem = wps[k]
        #First preprocessing, tokenize slightly
        problem = problem.strip().split(" ")
        for i, x in enumerate(problem):
            if len(x) == 0: continue
            if x[-1] in [',', '.', '?']:
                problem[i] = x[:-1] + " " + x[-1]
        problem = ' '.join(problem)
        problem = " " + problem + " "
        print(problem)

        story = nlp.parse(problem)
        sets = makesets.makesets(story['sentences'])
        EF.main(sets, k, a[k], sys.argv[1])
        sets = [
            x for x in sets if makesets.floatcheck(x[1].num) or x[1].num == 'x'
        ]
        print(sets)
        for z in sets:
            z[1].details()
コード例 #2
0
ファイル: ilp_output.py プロジェクト: MathProblems/may2
def make_eq(q,a,VERBOSE,TRAIN,fold):
    bigtexamples = {x:([],[]) for x in ["+","*",'/','-','=']}
    #wps = open(q).readlines()
    #answs = open(a).readlines()
    #VERBOSE=True
    wps = q
    predicates = []

    for k in range(len(wps)):
        if VERBOSE:
            for i in range(len(wps)):
                print(i,wps[i])
            k = int(input())
        print(k)
        problem = wps[k].lower()
        #First preprocessing, tokenize slightly
        problem = problem.strip().split(" ")
        for i,x in enumerate(problem):
            if len(x)==0:continue
            if x[-1] in [',','.','?']:
                problem[i] = x[:-1]+" "+x[-1]
        problem = ' '.join(problem)
        problem = " " + problem + " "
        print(problem)


        
        story = nlp.parse(problem)
        sets = makesets.makesets(story['sentences'])
        EF.main(sets,k,a[k])
コード例 #3
0
def make_eq(q, a, VERBOSE, TRAIN):
    wps = q

    for k in range(len(wps)):
        if VERBOSE:
            for i in range(len(wps)):
                print(i, wps[i])
            k = int(input())
        print("Index: " + str(k))
        problem = wps[k]
        #First preprocessing, tokenize slightly
        problem = problem.strip().split(" ")
        for i, x in enumerate(problem):
            if len(x) == 0: continue
            if x[-1] in [',', '.', '?']:
                problem[i] = x[:-1] + " " + x[-1]
        problem = ' '.join(problem)
        problem = " " + problem + " "
        print("Problem: " + problem)
        print("Answer: " + a[k].strip())

        story = nlp.parse(problem)
        sets = makesets.makesets(story['sentences'])
        sets = [
            x for x in sets if makesets.floatcheck(x[1].num) or x[1].num == 'x'
        ]

        ents = [x[1].entity for x in sets]
        print("--- SETS ---")
        for z in sets:
            z[1].details()
コード例 #4
0
ファイル: ILPformatSiena.py プロジェクト: MathProblems/August
def make_eq(q,a,VERBOSE,TRAIN):
    bigtexamples = {x:([],[]) for x in ["+","*",'/','-','=']}
    #wps = open(q).readlines()
    #answs = open(a).readlines()
    #VERBOSE=True
    wps = q

    for k in range(len(wps)):
        if VERBOSE:
            for i in range(len(wps)):
                print(i,wps[i])
            k = int(input())
        print(k)
        problem = wps[k]
        #First preprocessing, tokenize slightly
        problem = problem.strip().split(" ")
        for i,x in enumerate(problem):
            if len(x)==0:continue
            if x[-1] in [',','.','?']:
                problem[i] = x[:-1]+" "+x[-1]
        problem = ' '.join(problem)
        problem = " " + problem + " "
        print(problem)

        story = read_parse(k)
        sets = makesets.makesets(story['sentences'])
        pickle.dump(sets, open('madesets/'+str(k)+'.pickle','wb'))
        EF.main(sets,k,a[k],sys.argv[1])
        sets = [x for x in sets if makesets.floatcheck(x[1].num) or x[1].num == 'x']
        print(sets)
        for z in sets:
            z[1].details()
コード例 #5
0
ファイル: ILPformatSiena.py プロジェクト: rootcanal/August
def make_eq(q, a, VERBOSE, TRAIN):
    #wps = open(q).readlines()
    #answs = open(a).readlines()
    #VERBOSE=True
    wps = q

    for k in range(len(wps)):
        if VERBOSE:
            for i in range(len(wps)):
                print(i, wps[i])
            k = int(input())
        print(k)
        #First preprocessing, tokenize slightly
        problem = utils.preprocess_problem(wps[k])
        print(problem)

        story = utils.read_parse(k)
        sets = makesets.makesets(story['sentences'])
        EF.main(sets, k, a[k], sys.argv[1])
        sets = [x for x in sets
                if makesets.floatcheck(x[1].num)
                or x[1].num == 'x']
        print(sets)
        for z in sets:
            z[1].details()
コード例 #6
0
ファイル: ILPformatSiena.py プロジェクト: rootcanal/August
def make_eq(q, a, VERBOSE, TRAIN):
    #wps = open(q).readlines()
    #answs = open(a).readlines()
    #VERBOSE=True
    wps = q

    for k in range(len(wps)):
        if VERBOSE:
            for i in range(len(wps)):
                print(i, wps[i])
            k = int(input())
        print(k)
        #First preprocessing, tokenize slightly
        problem = utils.preprocess_problem(wps[k])
        print(problem)

        story = utils.read_parse(k)
        sets = makesets.makesets(story['sentences'])
        EF.main(sets, k, a[k], sys.argv[1])
        sets = [
            x for x in sets if makesets.floatcheck(x[1].num) or x[1].num == 'x'
        ]
        print(sets)
        for z in sets:
            z[1].details()
コード例 #7
0
ファイル: ash_output.py プロジェクト: MathProblems/may2
def make_eq(q,a,VERBOSE,TRAIN):
    bigtexamples = {x:([],[]) for x in ["+","*",'/','-','=']}
    wps = open(q).readlines()
    #answs = open(a).readlines()
    #VERBOSE=True
    if not TRAIN and not VERBOSE:
        out = open(q+".out.txt",'w')
    problematic = open('somethingWrongProblems','w')


    

    replacements = {' two ':' 2 '," three ":' 3 ',' four ':' 4 ',' five ':' 5 ',' six ':' 6 ',' seven ':' 7 ',' eight ':' 8 ',' nine ':' 9 ',' ten ':' 10 ',' eleven ':' 11 ', ' twice ':' 2 '}

    for k in range(len(wps)):
        if VERBOSE:
            for i in range(len(wps)):
                print(i,wps[i])
            k = int(input())
        print(k)
        problem = wps[k].lower()
        #First preprocessing, tokenize slightly
        problem = problem.strip().split(" ")
        for i,x in enumerate(problem):
            if len(x)==0:continue
            if x[-1] in [',','.','?']:
                problem[i] = x[:-1]+" "+x[-1]
        problem = ' '.join(problem)
        problem = " " + problem + " "
        print(problem)


        for r in replacements:
            problem = problem.replace(r,replacements[r])
        
        '''
        if " how " in problem:
            left,right = problem.split(" how ")
        else: left = problem

        for r in replacements:
            left = left.replace(r,replacements[r])
        if " how " in problem:
            problem = left + ' how ' + right
        else:
            problem = left
        '''

        story = nlp.parse(problem)
        sets = makesets.makesets(story['sentences'])
        EF.main(sets,k)

        '''
コード例 #8
0
ファイル: ash_output.py プロジェクト: MathProblems/may2
def make_eq(q, a, VERBOSE, TRAIN):
    wps = q

    replacements = {
        ' two ': ' 2 ',
        " three ": ' 3 ',
        ' four ': ' 4 ',
        ' five ': ' 5 ',
        ' six ': ' 6 ',
        ' seven ': ' 7 ',
        ' eight ': ' 8 ',
        ' nine ': ' 9 ',
        ' ten ': ' 10 ',
        ' eleven ': ' 11 ',
        ' twice ': ' 2 '
    }

    for k in range(len(wps)):
        if VERBOSE:
            for i in range(len(wps)):
                print(i, wps[i])
            k = int(input())
        print(k)
        problem = wps[k].lower()
        #First preprocessing, tokenize slightly
        problem = problem.strip().split(" ")
        for i, x in enumerate(problem):
            if len(x) == 0: continue
            if x[-1] in [',', '.', '?']:
                problem[i] = x[:-1] + " " + x[-1]
        problem = ' '.join(problem)
        problem = " " + problem + " "
        print(problem)

        for r in replacements:
            problem = problem.replace(r, replacements[r])

        story = nlp.parse(problem)
        sets = makesets.makesets(story['sentences'])
        EF.main(sets, k, a[k])
        sets = [
            x for x in sets if makesets.floatcheck(x[1].num) or x[1].num == 'x'
        ]
        print(sets)

        ents = [x[1].entity for x in sets]

        for z in sets:
            z[1].details()
コード例 #9
0
ファイル: javad_ILPinput.py プロジェクト: MathProblems/August
def make_eq(q, a, e, VERBOSE, TRAIN):
    bigtexamples = {x: ([], []) for x in ["+", "*", '/', '-', '=']}
    #wps = open(q).readlines()
    #answs = open(a).readlines()
    #VERBOSE=True
    wps = q

    for k in range(len(wps)):
        if VERBOSE:
            for i in range(len(wps)):
                print(i, wps[i])
            k = int(input())
        print(k)
        problem = wps[k]
        #First preprocessing, tokenize slightly
        problem = problem.strip().split(" ")
        for i, x in enumerate(problem):
            if len(x) == 0: continue
            if x[-1] in [',', '.', '?']:
                problem[i] = x[:-1] + " " + x[-1]
        problem = ' '.join(problem)
        problem = " " + problem + " "
        print(problem)

        story = read_parse(k)
        sets = makesets.makesets(story['sentences'])

        if e[k].strip() != '':
            irrelev = e[k].strip().split(" ")
            imap = [(x[-1], x) for x in irrelev]
            if not all([x[0] for x in imap]) == '0':
                print(imap)
                for x in [x for x in imap if x[0] == '0']:
                    ent, num, v = x[1].split(',')
                    if ent in ["$", "dollar"]: continue
                    else:
                        sets = [x for x in sets if x[1].num != num]

        pickle.dump(sets, open('madesets/' + str(k) + '.pickle', 'wb'))
        EF.main(sets, k, a[k], sys.argv[1])
        sets = [
            x for x in sets if makesets.floatcheck(x[1].num) or x[1].num == 'x'
        ]
        print(sets)
        for z in sets:
            z[1].details()
コード例 #10
0
ファイル: makeverbs.py プロジェクト: MathProblems/may2
def make_eq(q, a, VERBOSE, TRAIN, fold):
    bigtexamples = {x: ([], []) for x in ["+", "*", '/', '-', '=']}
    #wps = open(q).readlines()
    #answs = open(a).readlines()
    #VERBOSE=True
    wps = q
    predicates = []

    for k in range(len(wps)):
        if VERBOSE:
            for i in range(len(wps)):
                print(i, wps[i])
            k = int(input())
        print(k)
        problem = wps[k].lower()
        #First preprocessing, tokenize slightly
        problem = problem.strip().split(" ")
        for i, x in enumerate(problem):
            if len(x) == 0: continue
            if x[-1] in [',', '.', '?']:
                problem[i] = x[:-1] + " " + x[-1]
        problem = ' '.join(problem)
        problem = " " + problem + " "
        print(problem)

        story = nlp.parse(problem)
        sets = makesets.makesets(story['sentences'])
        #EF.main(sets,k)
        sets = [
            x for x in sets if makesets.floatcheck(x[1].num) or x[1].num == 'x'
        ]
        print(sets)
        for z in sets:
            if z[1].verbs:
                predicates.extend(z[1].verbs.split(" "))

    predicates = list(set(predicates))
    pickle.dump(predicates, open("data/predicates" + fold, 'wb'))
コード例 #11
0
ファイル: train_local.py プロジェクト: MathProblems/may2
def make_eq(q, a, eqs, VERBOSE, TRAIN):
    bigtexamples = {x: ([], []) for x in ["+", "*", '/', '-', '=']}
    wps = q  #open(q).readlines()
    answs = a  #open(a).readlines()
    if not TRAIN and not VERBOSE:
        out = open(q + ".out.txt", 'w')
    problematic = open('somethingWrongProblems', 'w')

    replacements = {
        ' two ': ' 2 ',
        " three ": ' 3 ',
        ' four ': ' 4 ',
        ' five ': ' 5 ',
        ' six ': ' 6 ',
        ' seven ': ' 7 ',
        ' eight ': ' 8 ',
        ' nine ': ' 9 ',
        ' ten ': ' 10 ',
        ' eleven ': ' 11 ',
        ' twice ': ' 2 '
    }

    for k in range(len(wps)):
        print(eqs[k])
        if eqs[k].strip() == "None": continue
        answers = [eqs[k]]
        if VERBOSE:
            for i in range(len(wps)):
                print(i, wps[i])
            k = int(input())
        print(k)

        #First preprocessing, tokenize slightly
        problem = wps[k].lower()
        problem = problem.strip().split(" ")
        for i, x in enumerate(problem):
            if len(x) == 0: continue
            if x[-1] in [',', '.', '?']:
                problem[i] = x[:-1] + " " + x[-1]
        problem = ' '.join(problem)
        problem = " " + problem + " "
        print(problem)

        for r in replacements:
            problem = problem.replace(r, replacements[r])

        #make story
        story = nlp.parse(problem)
        sets = makesets.makesets(story['sentences'])
        i = 0
        print(sets)
        while i < len(sets):
            dups = [y for y in sets if y[1].idx != None]
            dups = [y for y in dups if y[1].idx == sets[i][1].idx]
            if len(dups) > 1:
                good = [
                    y for y in dups
                    if len([x for x in y[1].num if x.isdigit()]) > 0
                ]
                if good:
                    others = [x for x in dups if x != good[0]]
                    for x in others:
                        sets.remove(x)
                else:
                    # just pick 1
                    for x in dups[1:]:
                        sets.remove(x)
            i += 1

        xidx = [i for i, x in enumerate(sets) if x[1].num == 'x']
        if not xidx:
            problematic.write('no x :' + problem)
            continue

        #TODO look for 2 xes
        xidx = xidx[0]

        numlist = [(cleannum(v.num), v) for k, v in sets]
        numlist = [x for x in numlist if x[0] != '']
        if VERBOSE:
            for z, v in numlist:
                v.details()
            input()

        allnumbs = {str(k): v for k, v in numlist}

        objs = {k: (0, v) for k, v in numlist}

        for j, eq in enumerate(answers):
            trips = []
            print(j, eq)
            l, r = [x.strip().split(' ') for x in eq.split('=')]

            #compound = r if len(l)==1 else l
            #simplex = l if len(l)==1 else r
            target = 'x'
            target = (target, objs[target])

            #find innermost parens?
            sides = []
            for i, compound in enumerate([l, r]):
                while len(compound) > 1:
                    if "(" in compound:
                        rpidx = (len(compound) - 1) - compound[::-1].index('(')
                        lpidx = rpidx + compound[rpidx:].index(")")
                        subeq = compound[rpidx + 1:lpidx]
                        substr = "(" + ''.join(subeq) + ")"
                        compound = compound[:rpidx] + [substr
                                                       ] + compound[lpidx + 1:]
                    else:
                        subeq = compound[0:3]
                        substr = "(" + ''.join(subeq) + ")"
                        compound = [substr] + compound[3:]
                    if True:
                        p, op, e = subeq
                        #print(p,op,e)
                        p = objs[p]
                        e = objs[e]
                        op = op.strip()
                        trips.append((op, p, e))
                        pute = (0, makesets.combine(p[1], e[1], op))
                        #print("OPERATION SELECTED: ",op)
                        #p.details()
                        #e.details()
                        #print(substr,pute[1].num)
                        objs[substr] = pute
                    if pute == -1:
                        exit()
            t = training(trips, problem, story, target)
            for op in t:
                bigtexamples[op][0].extend(t[op][0])
                bigtexamples[op][1].extend(t[op][1])
            print(op, len(bigtexamples[op][0]))
    if TRAIN:
        pickle.dump(bigtexamples, open('data/' + OUT + ".local.training",
                                       'wb'))
コード例 #12
0
ファイル: make_equations.py プロジェクト: MathProblems/may2
def make_eq(q, a, VERBOSE, TRAIN):
    bigtexamples = {x: ([], []) for x in ["+", "*", '/', '-', '=']}
    wps = open(q).readlines()
    answs = open(a).readlines()
    if not TRAIN and not VERBOSE:
        out = open(q + ".out.txt", 'w')
    problematic = open('somethingWrongProblems', 'w')

    replacements = {
        ' two ': ' 2 ',
        " three ": ' 3 ',
        ' four ': ' 4 ',
        ' five ': ' 5 ',
        ' six ': ' 6 ',
        ' seven ': ' 7 ',
        ' eight ': ' 8 ',
        ' nine ': ' 9 ',
        ' ten ': ' 10 ',
        ' eleven ': ' 11 ',
        ' twice ': ' 2 '
    }

    for k in range(len(wps)):
        if VERBOSE:
            for i in range(len(wps)):
                print(i, wps[i])
            k = int(input())
        print(k)
        problem = wps[k].lower()
        #First preprocessing, tokenize slightly
        problem = problem.strip().split(" ")
        for i, x in enumerate(problem):
            if len(x) == 0: continue
            if x[-1] in [',', '.', '?']:
                problem[i] = x[:-1] + " " + x[-1]
        problem = ' '.join(problem)
        problem = " " + problem + " "
        print(problem)

        for r in replacements:
            problem = problem.replace(r, replacements[r])
        '''
        if " how " in problem:
            left,right = problem.split(" how ")
        else: left = problem

        for r in replacements:
            left = left.replace(r,replacements[r])
        if " how " in problem:
            problem = left + ' how ' + right
        else:
            problem = left
        '''

        story = nlp.parse(problem)
        sets = makesets.makesets(story['sentences'])
        i = 0
        print(sets)
        while i < len(sets):
            dups = [y for y in sets if y[1].idx != None]
            dups = [y for y in dups if y[1].idx == sets[i][1].idx]
            if len(dups) > 1:
                good = [
                    y for y in dups
                    if len([x for x in y[1].num if x.isdigit()]) > 0
                ]
                if good:
                    others = [x for x in dups if x != good[0]]
                    for x in others:
                        sets.remove(x)
                else:
                    # just pick 1
                    for x in dups[1:]:
                        sets.remove(x)
            i += 1

        xidx = [i for i, x in enumerate(sets) if x[1].num == 'x']
        if not xidx:
            problematic.write('no x :' + problem)
            continue

        #TODO look for 2 xes
        xidx = xidx[0]
        twoToRight = False
        if xidx > 0:
            print(len(sets), xidx)
            if sets[xidx - 1][1].entity == 'dozen':
                # 2 vals to right
                twoToRight = True
        if len(sets) - xidx > 1:
            if sets[xidx + 1][1].entity == 'dozen':
                twoToRight = True
        if len(sets) - xidx < 3:
            if sets[xidx][1].entity == 'dozen':
                twoToRight = True

        numlist = [(cleannum(v.num), v) for k, v in sets]
        numlist = [x for x in numlist if x[0] != '']
        if VERBOSE:
            for z, v in numlist:
                v.details()
            input()

        allnumbs = {str(k): v for k, v in numlist}

        objs = {k: (0, v) for k, v in numlist}

        print('start solving')
        print(numlist)
        if len(numlist) < 2:
            problematic.write("not enough numbers : " + problem)
            continue

        values = [x[0] for x in numlist if x[0] != 'x']
        print(values)
        ST = Solver(values)

        answers = []
        answers = ST.solveEquations(float(answs[k]))
        print(answs[k])
        if not answers:
            problematic.write("No answers : " + problem + "\n")
            problematic.write(str([x[0] for x in numlist]) + '\n')
            problematic.write(answs[k] + '\n')
            continue
        print('done solving')

        # if target has 2 entities, try eqs with = x op y format
        simpleranswers = None
        if twoToRight:
            try:
                simpleranswers = [
                    x for x in answers if x.split(" ")[-4] == "=" and (
                        x.split(" ")[-3] == 'x' or x.split(' ')[-1] == 'x')
                ]
            except:
                pass
        if not simpleranswers:
            simpleranswers = [
                x for x in answers
                if x.split(" ")[1] == '=' or x.split(" ")[-2] == "="
            ]
        #simpleranswers = [x for x in answers if x.split(" ")[1] == '=' or x.split(" ")[-2]=="="]

        #filter out where = in middle if simpler eq exists
        if simpleranswers:
            print(answers)
            answers = simpleranswers[:]
        else:
            problematic.write("not simple : " + problem + "\n")
            continue

        values = [x[0] for x in numlist]
        xidx = values.index('x')
        print(simpleranswers)
        print(xidx)
        for a in simpleranswers:
            aspl = [
                x for x in a.split(" ")
                if x not in ["/", "-", '+', '*', '=', '(', ')']
            ]
            print(a)
            print(aspl)
            print(values)
            aidx = aspl.index('x')
            print(aidx)
            if aidx != xidx:
                print("removing ", a)
                answers.remove(a)
        print(answers)
        if answers == []:
            answers = simpleranswers

        print(answers)
        if not VERBOSE:
            if not TRAIN:
                out.write(problem + '\n')
                out.write(answs[k] + "\n")
                out.write(str([x[0] for x in numlist]))
                out.write("\n")
                for x in answers:
                    out.write(x + "\n")
                out.write("___\n")

        if VERBOSE:
            input()
        if not TRAIN:
            continue

        if len([x for x in answers if x.split(" ")[-2] == "="]) > 0:
            answers = [x for x in answers if x.split(" ")[-2] == "="]

        c = randint(0, len(answers) - 1)
        answers = [answers[c]]
        for j, eq in enumerate(answers):
            trips = []
            print(j, eq)
            l, r = [x.strip().split(' ') for x in eq.split('=')]

            compound = r if len(l) == 1 else l
            simplex = l if len(l) == 1 else r
            target = simplex[0]
            target = (target, objs[target])

            #find innermost parens?
            while len(compound) > 1:
                if "(" in compound:
                    rpidx = (len(compound) - 1) - compound[::-1].index('(')
                    lpidx = rpidx + compound[rpidx:].index(")")
                    subeq = compound[rpidx + 1:lpidx]
                    substr = "(" + ''.join(subeq) + ")"
                    compound = compound[:rpidx] + [substr
                                                   ] + compound[lpidx + 1:]
                else:
                    subeq = compound[0:3]
                    substr = "(" + ''.join(subeq) + ")"
                    compound = [substr] + compound[3:]
                if True:
                    p, op, e = subeq
                    #print(p,op,e)
                    p = objs[p]
                    e = objs[e]
                    op = op.strip()
                    trips.append((op, p, e))
                    pute = (0, makesets.combine(p[1], e[1], op))
                    #print("OPERATION SELECTED: ",op)
                    #p.details()
                    #e.details()
                    #print(substr,pute[1].num)
                    objs[substr] = pute
                if pute == -1:
                    exit()
            if simplex == l:
                trips.append(("=", objs[simplex[0]], objs[compound[0]]))
            else:
                trips.append(("=", objs[compound[0]], objs[simplex[0]]))
            t = training(trips, problem, target)
            for op in t:
                bigtexamples[op][0].extend(t[op][0])
                bigtexamples[op][1].extend(t[op][1])
            print(op, len(bigtexamples[op][0]))
    if TRAIN:
        pickle.dump(bigtexamples, open('data/' + OUT + ".training", 'wb'))
コード例 #13
0
def infer(q,a,VERBOSE):
    wps = open(q).readlines()
    answs = open(a).readlines()
    problematic = open('somethingWrongProblems','a')

    ar = [0,0]
    sr = [0,0]
    mr = [0,0]
    dr = [0,0]

    replacements = {' two ':' 2 '," three ":' 3 ',' four ':' 4 ',' five ':' 5 ',' six ':' 6 ',' seven ':' 7 ',' eight ':' 8 ',' nine ':' 9 ',' ten ':' 10 ',' eleven ':' 11 ', ' twice ':' 2 '}
    right = 0
    guesses = 0
    ad = []
    wrong = []
    multiops = 0
    multiopsright = 0
    
    replacements = {' two ':' 2 '," three ":' 3 ',' four ':' 4 ',' five ':' 5 ',' six ':' 6 ',' seven ':' 7 ',' eight ':' 8 ',' nine ':' 9 ',' ten ':' 10 ',' eleven ':' 11 ', ' twice ':' 2 '}

    for k in range(len(wps)):
        if VERBOSE:
            for i in range(len(wps)):
                print(i,wps[i])
            k = int(input())
        print(k)
        problem = wps[k].lower()
        #First preprocessing, tokenize slightly
        problem = problem.strip().split(" ")
        for i,x in enumerate(problem):
            if len(x)==0:continue
            if x[-1] in [',','.','?']:
                problem[i] = x[:-1]+" "+x[-1]
        problem = ' '.join(problem)
        problem = " " + problem + " "
        print(problem)

        for r in replacements:
            problem = problem.replace(r,replacements[r])

        story = nlp.parse(problem)
        sets = makesets.makesets(story['sentences'])
            #REMOVE DUPS THIS IS BAD:
        i = 0
        while i < len(sets):
            x = sets[i]
            dups = [y for y in sets if y[1].num == x[1].num]
            if len(dups)>1:
                for x in dups[1:]:
                    sets.remove(x)
            i+=1

        print("Sets detected: ")
        for x in sets:
            x[1].details()
        numlist = [(cleannum(v.num),v) for k,v in sets]
        numlist = [x for x in numlist if x[0]!='']
        if VERBOSE:
            for z,v in numlist:
                v.details()
            input()

        allnumbs = {str(k):v for k,v in numlist}
            

        objs = {k:(0,v) for k,v in numlist}


        constraints = []
        for i in range(len(numlist)):
            if numlist[i][0][-1] == "*":
                if i==0:continue
                constraints.append(numlist[i-1][0]+" * "+numlist[i][0][:-1])
                numlist[i] = (''.join([x for x in numlist[i][0] if x not in ['*','/']]),numlist[i][1])
                numlist[i][1].num = numlist[i][0]
            elif numlist[i][0][0] == "*":
                if i==0:continue
                numlist[i] = (''.join([x for x in numlist[i][0] if x not in ['*','/']]),numlist[i][1])
                tmp = numlist[i-1]
                numlist[i-1]=numlist[i]
                numlist[i]=tmp
                constraints.append(numlist[i-1][0]+" * "+numlist[i][0][1:])
            elif numlist[i][0][-1] == "/":
                if i==0:continue
                constraints.append(" / "+numlist[i][0][:-1])
                numlist[i] = (''.join([x for x in numlist[i][0] if x not in ['*','/']]),numlist[i][1])
        objs = {k:(0,v) for k,v in numlist}
        if len(objs)<2:
            wrong.append(k)
            continue
        if 'x' not in objs:
            wrong.append(k)
            continue
        

        integerproblem = all([float(x[0]).is_integer() for x in numlist if x[0]!='x'])
        multi = False
        if len(objs)>3:
            multiops+=1
            multi = True
        if VERBOSE:
            print(objs,numlist,[v.num for k,v in sets])
        #print(allnumbs)


        state = []
        #print(numlist)

        

        #for e in allnumbs.items():
        #print(numlist)
        numidxlist = [x[0] for x in numlist]
        ST = StringTemplate(numidxlist, inf=True)
        scores = []
        for j,eq in enumerate(ST.equations):
            #print(j,eq.toString())
            good = False
            if len(constraints)==0:
                good = True
            else:
                for constraint in constraints:
                    if constraint in eq.toString():
                        good = True
            if not good:
                scores.append(-0.2)
                continue
            
                    
            thisscore = []
            #print(eq.toString())
            #determine score for this eq
            l,r = [x.strip().split(' ') for x in eq.toString().split('=')]
            #print(l,r)
            
            if len(r)>1 and len(l)>1:
                scores.append(-0.2);continue
            if len(r)>1: 

                compound = r
                target = l[0]
            else:
                #print(constraints)
                compound = l
                target = r[0]
            target = (target,objs[target])

            #find innermost parens?
            while len(compound)>1:
                if "(" in compound:
                    rpidx = (len(compound) - 1) - compound[::-1].index('(')
                    lpidx = rpidx+compound[rpidx:].index(")")
                    subeq = compound[rpidx+1:lpidx]
                    substr = "("+''.join(subeq)+")"
                    compound = compound[:rpidx]+[substr]+compound[lpidx+1:]
                else:
                    subeq = compound[0:3]
                    substr = "("+''.join(subeq)+")"
                    compound = [substr]+compound[3:]
                if substr in objs:
                    pute = objs[substr]
                    #print(pute[0],pute[1].num)
                else:
                    p,op,e = subeq
                    #print(p,op,e)
                    p = objs[p][1]
                    e = objs[e][1]
                    op = op.strip()
                    pute = compute(p,op,e,target,problem)
                    #print("OPERATION SELECTED: ",op)
                    #p.details()
                    #e.details()
                    #print(substr,pute[1].num)
                    objs[substr]=pute
                if pute == -1:
                    exit()
                score,c = pute
                thisscore.append(score)
            if target[1][1].entity != c.entity:
                thisscore.append(-0.2)
            #print("WAT",thisscore,c.ent,c.num)
            
            scores.append(sum(thisscore))

            #print(compound)
        m = np.argmax(scores)
        #print(scores[m],ST.equations[m].toString())
        srt = sorted([(x,i) for i,x in enumerate(scores)],reverse=True)
        print('\n Top scoring 3 equations: ')
        for x,i in srt[:3]:
            print(x,ST.equations[i].toString())

        '''
        try:
            if target.ent=='dozen':
                guess = solve('('+numlist[0].num+'/12)'+"-"+target.num,'x')[0]
                print(numlist[0].num+"/12="+target.num)
            else:
                guess = solve(numlist[0].num+"-"+target.num,'x')[0]
                print(numlist[0].num+"="+target.num)
        '''
        eqidxs = [y[0] for y in sorted(enumerate(scores),key=lambda x:x[1],reverse=True)]
        eqnidsx = [x[1] for x in srt]
        seen = []
        tright = 0
        for i in eqidxs:
            if len(seen)>=1:break
            eq = ST.equations[i].toString()
            #eq = eq.replace("=",'-')
            splitEquation = eq.split('=')
            eq = splitEquation[0] + '- (' + splitEquation[1] + ')'
            #print(scores[i], eq)
            try:
                guess = solve(eq,'x')[0]
            except: guess = -1

            # This is the non-negative constraint
            # wrapped in a "check for complex number" try statement :/
            try:
                if guess < 0:
                    continue
            except:
                continue

            #this is a constraint agianst fractional answers when the problem is integers
            if not guess.is_integer:
                if integerproblem:
                    continue

            if guess not in seen:
                seen.append(guess)
            else: 
                continue
            answ = float(answs[k])
            ops = [x for x in ST.equations[i].toString() if x in ['+','-','*','/']]
            if guess == answ: 
                print("\nCORRECT")
                tright=1
                ar[0] += ops.count('+')
                sr[0] += ops.count('-')
                mr[0] += ops.count('*')
                dr[0] += ops.count('/')
            else:
                print("\nINCORRECT")
            ar[1] += ops.count('+')
            sr[1] += ops.count('-')
            mr[1] += ops.count('*')
            dr[1] += ops.count('/')
            print("Guessed Equation : ",ST.equations[i].toString() )

            print("Guess : ",guess,"\nTrue Answer :", answ, '\n\n')
        guesses += len(seen)
        if tright==1:
            if multi:
                multiopsright += 1
            right +=1
        else:
            wrong.append(k)

        #break
        if VERBOSE: input()
        continue
    print(right,guesses)
    print(multiops,multiopsright)
    print(ar,sr,mr,dr)
コード例 #14
0
def make_eq(q, a, equations):
    bigtexamples = {x: ([], []) for x in ["+", "*", '/', '-', '=']}
    wps = q  #open(q).readlines()
    answs = a  #open(a).readlines()

    for k in range(len(wps)):
        eqs = get_k_eqs(equations[k])
        answers = [x[1] for x in eqs if x[0] == 1]
        answers = [x for x in answers if x.split()[-2] == '=']
        answers = [x for x in answers if x.split()[-1] == 'x']
        if answers == []: continue
        answers = list(set(answers))

        #First preprocessing, tokenize slightly
        problem = wps[k]  #.lower()
        problem = problem.strip().split(" ")
        for i, x in enumerate(problem):
            if len(x) == 0: continue
            if x[-1] in [',', '.', '?']:
                problem[i] = x[:-1] + " " + x[-1]
        problem = ' '.join(problem)
        problem = " " + problem + " "
        print(k)
        print(problem)

        #make story
        story = nlp.parse(problem)
        sets = makesets.makesets(story['sentences'])
        i = 0

        xidx = [i for i, x in enumerate(sets) if x[1].num == 'x']
        if not xidx:
            print("NO X WHY")
            continue

        numlist = [(cleannum(v.num), v) for k, v in sets]
        numlist = [x for x in numlist if x[0] != '']
        allnumbs = {str(k): v for k, v in numlist}
        objs = {k: (0, v) for k, v in numlist}
        print(objs.items())
        consts = [
            x for x in answers[0].split(" ") if x not in [
                '(',
                ')',
                '+',
                '-',
                '/',
                '*',
                '=',
            ]
        ]
        present = [x for x in consts if x in objs]
        if present != consts:
            print(present, consts)
            print("missing thing")
            continue

        #simpleanswers = []
        #for x in answers:
        #    try:
        #        x1 = x[1].strip().split(" ")
        #        if x[-2]=='=' and x[-1]=='x':
        #            simplenaswers.append(x)
        #    except:
        #        pass
        #if simpleanswers:
        #    answers = simpleanswers

        #ri = randint(0,len(answers)-1)
        #if answers == []:
        #    continue
        #answers = [answers[ri]]
        for j, eq in enumerate(answers):
            trips = []
            print(j, eq)
            l, r = [x.strip().split(' ') for x in eq.split('=')]

            target = 'x'
            target = (target, objs[target])

            #find innermost parens?
            sides = []
            for i, compound in enumerate([l, r]):
                while len(compound) > 1:
                    if "(" in compound:
                        rpidx = (len(compound) - 1) - compound[::-1].index('(')
                        lpidx = rpidx + compound[rpidx:].index(")")
                        subeq = compound[rpidx + 1:lpidx]
                        substr = "(" + ''.join(subeq) + ")"
                        compound = compound[:rpidx] + [substr
                                                       ] + compound[lpidx + 1:]
                    else:
                        subeq = compound[0:3]
                        substr = "(" + ''.join(subeq) + ")"
                        compound = [substr] + compound[3:]
                    if True:
                        p, op, e = subeq
                        p = objs[p]
                        e = objs[e]
                        op = op.strip()
                        trips.append((op, p, e))
                        pute = (0, makesets.combine(p[1], e[1], op))
                        objs[substr] = pute
                    if pute == -1:
                        exit()
            t = training(trips, problem, story, target, sets)
            for op in t:
                bigtexamples[op][0].extend(t[op][0])
                bigtexamples[op][1].extend(t[op][1])
    pickle.dump(bigtexamples, open('data/ixl.local.training', 'wb'))
コード例 #15
0
ファイル: newinference.py プロジェクト: MathProblems/may2
def make_eq(q, a, equations):
    wps = q  #open(q).readlines()
    answs = a  #open(a).readlines()
    right = 0
    wrong = 0

    for k in range(len(wps)):
        answers = get_k_eqs(equations[k], g=True, a=True)
        if answers == []: continue
        seeneq = []
        seen = []
        for x in answers:
            if x[1] not in seeneq:
                seen.append(x)
                seeneq.append(x[1])
        answers = seen
        answers = list(set(answers))

        #First preprocessing, tokenize slightly
        problem = wps[k]  #.lower()
        problem = problem.strip().split(" ")
        for i, x in enumerate(problem):
            if len(x) == 0: continue
            if x[-1] in [',', '.', '?']:
                problem[i] = x[:-1] + " " + x[-1]
        problem = ' '.join(problem)
        problem = " " + problem + " "
        print(problem)

        #make story
        story = nlp.parse(problem)
        sets = makesets.makesets(story['sentences'])
        i = 0

        ######
        for x in sets:
            x[1].details()
        #continue

        xidx = [i for i, x in enumerate(sets) if x[1].num == 'x']
        if not xidx:
            print("NO X WHY")
            continue

        #TODO look for 2 xes
        xidx = xidx[0]

        numlist = [(cleannum(v.num), v) for k, v in sets]
        numlist = [x for x in numlist if x[0] != '']
        allnumbs = {str(k): v for k, v in numlist}
        objs = {k: (0, v) for k, v in numlist}
        print(objs.items())
        consts = [
            x for x in answers[0][1].split(" ") if x not in [
                '(',
                ')',
                '+',
                '-',
                '/',
                '*',
                '=',
            ]
        ]
        present = [x for x in consts if x in objs]
        if consts != present:
            print(present, consts)
            print("missing thing")
            continue
        if len([x for x in objs if x not in consts]) > 0:
            print("missing thing")
            continue
        scores = []

        for j, eq, cons, guess in answers:
            eqspl = eq.split(" = ")
            consts = [
                x for x in eq.split(" ") if x not in [
                    '(',
                    ')',
                    '+',
                    '-',
                    '/',
                    '*',
                    '=',
                ]
            ]
            order = int(consts == [x[0] for x in numlist])
            '''
            if order == 0: 
                if eqspl[0].strip() == 'x' or eqspl[1].strip()=='x':
                    eq2 = eqspl[1] + " = " + eqspl[0]
                    consts = [x for x in eq2.split(" ") if x not in ['(',')','+','-','/','*','=',]]
                    order = int(consts==[x[0] for x in numlist])
                if order == 0: 
                    continue
            '''
            if order == 0: continue
            #j = randint(0,len(answers)-1)
            #eq = answers[j]
            trips = []
            #print(j,eq)
            l, r = [x.strip().split(' ') for x in eq.split('=')]
            consts = " ".join([
                x for x in answers[0][1].split(" ") if x not in [
                    '(',
                    ')',
                    '+',
                    '-',
                    '/',
                    '*',
                ]
            ])
            consts = consts.split(" = ")
            sp = (objs[consts[0].split(" ")[-1]],
                  objs[consts[1].split(" ")[0]])

            target = 'x'
            target = (target, objs[target])

            #find innermost parens?
            sides = []
            thisscore = []
            for i, compound in enumerate([l, r]):
                while len(compound) > 1:
                    if "(" in compound:
                        rpidx = (len(compound) - 1) - compound[::-1].index('(')
                        lpidx = rpidx + compound[rpidx:].index(")")
                        subeq = compound[rpidx + 1:lpidx]
                        substr = "(" + ''.join(subeq) + ")"
                        compound = compound[:rpidx] + [substr
                                                       ] + compound[lpidx + 1:]
                    else:
                        subeq = compound[0:3]
                        substr = "(" + ''.join(subeq) + ")"
                        compound = [substr] + compound[3:]
                    p, op, e = subeq
                    p = objs[p]
                    e = objs[e]
                    op = op.strip()
                    pute = compute(p, op, e, target, problem, story, order)
                    objs[substr] = pute
                    if pute == -1:
                        exit()
                    score, c, vals = pute
                    thisscore.append(score)
                sides.append(objs[compound[0]])
            p = sides[0]
            e = sides[1]
            score = 1
            for s in thisscore:
                score *= s
            score *= compute(p, '=', e, target, problem, story, order, score,
                             cons)[0]
            scores.append((score, j, eq, guess))
        scores = sorted(scores, reverse=True)
        righties = [x for x in scores[:3] if x[1] == 1]
        if not righties:
            wrong += 1
            print(scores[:3])
            print("TOP SCORING NO CORRECT SOLUTION \nINCORRECT")
            continue
        else:
            print(scores[:3])
            corr = righties[0][3]
        '''
        guessd = {}
        for x in scores[:3]:
            if x[3] not in guessd:
                guessd[x[3]]=x[0]
            else:
                guessd[x[3]]+=x[0]

        guessd = sorted(guessd.items(),key=lambda x: x[1],reverse=True)

        if guessd[0][0]==corr:
            right+=1
            print("CORRECT")
        else:
            wrong += 1
            print("INCORRECT")

        '''
        if len(scores) > 0:
            if scores[0][1] == 1:
                right += 1
                print("CORRECT")
            else:
                wrong += 1
                print("INCORRECT")
        else:
            wrong += 1
            print("INCORRECT")

    return (right, wrong)
コード例 #16
0
def score(problem):

    problem = problem.strip().split(" ")
    for i, x in enumerate(problem):
        if len(x) == 0: continue
        if x[-1] in [',', '.', '?']:
            problem[i] = x[:-1] + " " + x[-1]
    problem = ' '.join(problem)
    problem = " " + problem + " "
    print(problem)

    for r in replacements:
        problem = problem.replace(r, replacements[r])

    story = nlp.parse(problem)
    sets = makesets.makesets(story['sentences'])

    xidx = [i for i, x in enumerate(sets) if x[1].num == 'x']
    twoToRight = False
    if not xidx:
        print("AAAH~! NO X!")

    else:

        xidx = xidx[0]
        if xidx > 0:
            print(len(sets), xidx)
            if sets[xidx - 1][1].entity == 'dozen':
                # 2 vals to right
                twoToRight = True
        if len(sets) - xidx > 1:
            if sets[xidx + 1][1].entity == 'dozen':
                twoToRight = True
        if len(sets) - xidx < 3:
            if sets[xidx][1].entity == 'dozen':
                twoToRight = True
    print("Sets detected: ")
    for x in sets:
        x[1].details()
    numlist = [(cleannum(v.num), v) for k, v in sets]
    numlist = [x for x in numlist if x[0] != '']
    if VERBOSE:
        for z, v in numlist:
            v.details()
        input()

    allnumbs = {str(k): v for k, v in numlist}

    objs = {k: (0, v) for k, v in numlist}

    constraints = []
    for i in range(len(numlist)):
        if numlist[i][0][-1] == "*":
            if i == 0: continue
            constraints.append(numlist[i - 1][0] + " * " + numlist[i][0][:-1])
            numlist[i] = (''.join([
                x for x in numlist[i][0] if x not in ['*', '/']
            ]), numlist[i][1])
            numlist[i][1].num = numlist[i][0]
        elif numlist[i][0][0] == "*":
            if i == 0: continue
            numlist[i] = (''.join([
                x for x in numlist[i][0] if x not in ['*', '/']
            ]), numlist[i][1])
            tmp = numlist[i - 1]
            numlist[i - 1] = numlist[i]
            numlist[i] = tmp
            constraints.append(numlist[i - 1][0] + " * " + numlist[i][0][1:])
        elif numlist[i][0][-1] == "/":
            if i == 0: continue
            constraints.append(" / " + numlist[i][0][:-1])
            numlist[i] = (''.join([
                x for x in numlist[i][0] if x not in ['*', '/']
            ]), numlist[i][1])
    objs = {k: (0, v) for k, v in numlist}
    if 'x' not in objs:
        return -1
    if len(objs) < 2:
        return -1

    integerproblem = all(
        [float(x[0]).is_integer() for x in numlist if x[0] != 'x'])

    numidxlist = [x[0] for x in numlist]
    tripeqs = []
    if len(numidxlist) <= 3:
        ST = StringTemplate(numidxlist, inf=True)
        tripeqs = ST.equations
    else:
        for x in numidxlist:
            if x != 'x':
                ST = StringTemplate([y for y in numidxlist if y != x],
                                    inf=True)
                tripeqs.extend(ST.equations)
    scores = []
    equalsmatch = []
    contmatch = []
    failurerate = []
    fivescores = []
    for j, eq in enumerate(tripeqs):
        #print(j,eq.toString())
        good = False
        '''
        if len(constraints)==0:
            good = True
        else:
            for constraint in constraints:
                if constraint in eq.toString():
                    good = True
        if not good:
            scores.append(-0.2)
            continue
        '''

        thisscore = []
        thisfivescore = [[0] * 4, [0] * 4, [0] * 4, [0] * 4, [0] * 4]
        fivei = 0
        #print(eq.toString())
        #determine score for this eq
        l, r = [x.strip().split(' ') for x in eq.toString().split('=')]
        #print(l,r)

        if twoToRight:
            if len(r) != 3 and len(l) != 3:
                scores.append(-0.2)
                equalsmatch.append('x')
                contmatch.append('x')
                failurerate.append('x')
                fivescores.append(thisfivescore)
                continue
            if len(r) == 3:
                compound = r
                target = 'x'
            else:
                compound = l
                target = 'x'

        else:
            if len(r) > 1 and len(l) > 1:
                scores.append(-0.2)
                equalsmatch.append('x')
                contmatch.append('x')
                failurerate.append('x')
                fivescores.append(thisfivescore)
                continue
            '''
            if len(r)>1: 
                compound = r
                target = l[0]
            else:
                compound = l
                target = r[0]
            '''
        target = 'x'

        target = (target, objs[target])

        #find innermost parens?
        sides = []
        for compound in [l, r]:
            c = None
            while len(compound) > 1:
                if "(" in compound:
                    rpidx = (len(compound) - 1) - compound[::-1].index('(')
                    lpidx = rpidx + compound[rpidx:].index(")")
                    subeq = compound[rpidx + 1:lpidx]
                    substr = "(" + ''.join(subeq) + ")"
                    compound = compound[:rpidx] + [substr
                                                   ] + compound[lpidx + 1:]
                else:
                    subeq = compound[0:3]
                    substr = "(" + ''.join(subeq) + ")"
                    compound = [substr] + compound[3:]
                if substr in objs:
                    pute = objs[substr]
                    #print(pute[0],pute[1].num)
                else:
                    p, op, e = subeq
                    #print(p,op,e)
                    p = objs[p][1]
                    e = objs[e][1]
                    op = op.strip()
                    pute = compute(p, op, e, target, problem, story)
                    #print("OPERATION SELECTED: ",op)
                    #p.details()
                    #e.details()
                    #print(substr,pute[1].num)
                    objs[substr] = pute
                if pute == -1:
                    exit()
                score, c, vals = pute
                thisscore.append(score)
                if fivei < 5:
                    thisfivescore[fivei] = vals
                    fivei += 1
            if c == None:
                score, c = objs[compound[0]]
                thisscore.append(score)
            sides.append(c)
        thisscore.append(score)
        fivescores.append(thisfivescore)
        if sides[0].entity == sides[1].entity:
            #thisscore.append(-0.2)
            equalsmatch.append(1)
        else:
            equalsmatch.append(0)

        failurerate.append(sum([objs[x][1].type_failure for x in objs]))

        if target[1][1].container != c.container:
            contmatch.append(1)
        else:
            contmatch.append(0)

        if len(thisscore) == 0:
            scores.append(0)
        else:
            scores.append(sum(thisscore) / float(len(thisscore)))

    return (tripeqs, scores, equalsmatch, contmatch, integerproblem,
            failurerate, fivescores)
コード例 #17
0
def make_eq(q, a, equations):
    tdata = []
    wps = q  #open(q).readlines()
    answs = a  #open(a).readlines()

    for k in range(len(wps)):
        answers = get_k_eqs(equations[k])
        if answers == []: continue
        answers = list(set(answers))

        #First preprocessing, tokenize slightly
        problem = wps[k]  #.lower()
        problem = problem.strip().split(" ")
        for i, x in enumerate(problem):
            if len(x) == 0: continue
            if x[-1] in [',', '.', '?']:
                problem[i] = x[:-1] + " " + x[-1]
        problem = ' '.join(problem)
        problem = " " + problem + " "
        print(problem)

        #make story
        story = nlp.parse(problem)
        sets = makesets.makesets(story['sentences'])
        i = 0

        xidx = [i for i, x in enumerate(sets) if x[1].num == 'x']
        if not xidx:
            print("NO X WHY")
            continue

        #TODO look for 2 xes
        xidx = xidx[0]

        numlist = [(cleannum(v.num), v) for k, v in sets]
        numlist = [x for x in numlist if x[0] != '']
        allnumbs = {str(k): v for k, v in numlist}
        objs = {k: (0, v) for k, v in numlist}
        print(objs.items())
        consts = [
            x for x in answers[0][1].split(" ") if x not in [
                '(',
                ')',
                '+',
                '-',
                '/',
                '*',
                '=',
            ]
        ]
        present = [x for x in consts if x in objs]
        if consts != present:
            print(present, consts)
            print("missing thing")
            continue
        order = int(consts == [x[0] for x in numlist])

        for j, eq in answers:
            #j = randint(0,len(answers)-1)
            #eq = answers[j]
            trips = []
            print(j, eq)
            l, r = [x.strip().split(' ') for x in eq.split('=')]
            consts = " ".join([
                x for x in answers[0][1].split(" ") if x not in [
                    '(',
                    ')',
                    '+',
                    '-',
                    '/',
                    '*',
                ]
            ])
            consts = consts.split(" = ")
            sp = (objs[consts[0].split(" ")[-1]][1],
                  objs[consts[1].split(" ")[0]][1])

            target = 'x'
            target = (target, objs[target])

            #find innermost parens?
            sides = []
            for i, compound in enumerate([l, r]):
                while len(compound) > 1:
                    if "(" in compound:
                        rpidx = (len(compound) - 1) - compound[::-1].index('(')
                        lpidx = rpidx + compound[rpidx:].index(")")
                        subeq = compound[rpidx + 1:lpidx]
                        substr = "(" + ''.join(subeq) + ")"
                        compound = compound[:rpidx] + [substr
                                                       ] + compound[lpidx + 1:]
                    else:
                        subeq = compound[0:3]
                        substr = "(" + ''.join(subeq) + ")"
                        compound = [substr] + compound[3:]
                    if True:
                        p, op, e = subeq
                        p = objs[p]
                        e = objs[e]
                        op = op.strip()
                        #trips.append((op,p,e))
                        pute = (0, makesets.combine(p[1], e[1], op))
                        objs[substr] = pute
                    if pute == -1:
                        exit()
                sides.append(objs[compound[0]])
            tdata.append(
                training(sides[0], sides[1], problem, story, target, j, order,
                         sp))

    f = open("data/" + sys.argv[1][-1] + ".global.data", 'w')
    for v in tdata:
        f.write(str(v[0]) + " ")
        for i, j in enumerate(v[1:]):
            f.write(str(i + 1) + ":" + str(j) + " ")
        f.write("\n")
コード例 #18
0
def make_eq(q):
    bigtexamples = {x:([],[]) for x in ["+","*",'/','-','=']}
    wps,eqs= parse(q)
    

    for k in range(len(wps)):
        if len(wps[k])==0:continue
        problem = wps[k][0].lower()
        story = nlp.parse(problem)
        sets = makesets.makesets(story['sentences'])
        i = 0
        print(sets)
        while i < len(sets):
            dups = [y for y in sets if y[1].idx != None]
            dups = [y for y in dups if y[1].idx == sets[i][1].idx]
            if len(dups)>1:
                good = [y for y in dups if len([x for x in y[1].num if x.isdigit()])>0]
                if good:
                    others = [x for x in dups if x!=good[0]]
                    for x in others:
                        sets.remove(x)
                else:
                    # just pick 1
                    for x in dups[1:]:
                        sets.remove(x)
            i+=1


        xidx = [x for x in sets if x[1].num=='x']
        if not xidx:
            problematic.write('no x :'+problem); continue

        #TODO look for 2 xes
        '''
        xidx = xidx[0][0]
        postx = [x for x in numbs if x[0]>=xidx]
        if len(postx)>1:
            # 2 vals to right
            twoToRight = True
        else:
            twoToRight = False
        '''

        



        numlist = [(cleannum(v.num),v) for k,v in sets]
        numlist = [x for x in numlist if x[0]!='']

        allnumbs = {str(k):v for k,v in numlist}
            

        objs = {k:(0,v) for k,v in numlist}
        answers = eqs[k]

        for j,eq in enumerate(answers):
            trips = []
            print(j,eq)
            l,r = [x.strip().split(' ') for x in eq.split('=')]
            
            compound = r if len(l)==1 else l
            simplex = l if len(l)==1 else r
            target = simplex[0]
            target = (target,objs[target])

            #find innermost parens?
            while len(compound)>1:
                if "(" in compound:
                    rpidx = (len(compound) - 1) - compound[::-1].index('(')
                    lpidx = rpidx+compound[rpidx:].index(")")
                    subeq = compound[rpidx+1:lpidx]
                    substr = "("+''.join(subeq)+")"
                    compound = compound[:rpidx]+[substr]+compound[lpidx+1:]
                else:
                    subeq = compound[0:3]
                    substr = "("+''.join(subeq)+")"
                    compound = [substr]+compound[3:]
                if True:
                    p,op,e = subeq
                    #print(p,op,e)
                    p = objs[p]
                    e = objs[e]
                    op = op.strip()
                    trips.append((op,p,e))
                    pute = (0,makesets.combine(p[1],e[1],op))
                    #print("OPERATION SELECTED: ",op)
                    #p.details()
                    #e.details()
                    #print(substr,pute[1].num)
                    objs[substr]=pute
                if pute == -1:
                    exit()
            if simplex == l:
                trips.append(("=",objs[simplex[0]],objs[compound[0]]))
            else:
                trips.append(("=",objs[compound[0]],objs[simplex[0]]))
            t = training(trips,problem,target)
            for op in t:
                bigtexamples[op][0].extend(t[op][0])
                bigtexamples[op][1].extend(t[op][1])
            print(op,len(bigtexamples[op][0]))
    pickle.dump(bigtexamples,open('data/gold_training.pickle','wb'))
コード例 #19
0
ファイル: dev_mktraindata.py プロジェクト: MathProblems/may2
def dotrain():
    if len(sys.argv) > 1:
        wps = open(sys.argv[1]).readlines()
        answs = open(sys.argv[2]).readlines()
    else:
        wps = open("emnlp_noIrrelev_p.txt").readlines()
        answs = open("emnlp_noIrrelev_a.txt").readlines()
    problematic = open('nogoodtrainproblems', 'w')

    bigtexamples = {x: ([], []) for x in ["+", "*", '/', '-', '=']}
    replacements = {
        ' two ': ' 2 ',
        " three ": ' 3 ',
        ' four ': ' 4 ',
        ' five ': ' 5 ',
        ' six ': ' 6 ',
        ' seven ': ' 7 ',
        ' eight ': ' 8 ',
        ' nine ': ' 9 ',
        ' ten ': ' 10 ',
        ' eleven ': ' 11 ',
        ' week ': ' 7 days ',
        ' dozen ': ' 12 of ',
        ' dozens ': ' 12 ',
        ' twice ': ' 2 '
    }

    for k in range(len(wps)):
        print(k)
        problem = wps[k].lower()
        for r in replacements:
            problem = problem.replace(r, replacements[r])
        #extract numbers:
        #problem = ' '.join([x.replace(",","") for x in problem.split()])
        story = nlp.parse(problem)
        numbs = makesets.makesets(story['sentences'])

        numlist = [(cleannum(v.num), v) for k, v in numbs]
        numlist = [x for x in numlist if x[0] != '']

        allnumbs = {str(k): v for k, v in numlist}
        if 'x' not in allnumbs:
            if 'x*' not in allnumbs:
                problematic.write('no x :' + problem)
                continue

        objs = {k: (0, v) for k, v in numlist}

        print('start solving')
        print(numlist)
        if len(numlist) < 2:
            problematic.write("not enough numbers : " + problem)
            continue

        ST = Solver([x[0] for x in numlist if x[0] != 'x'])
        answers = ST.solveEquations(float(answs[k]))
        print('done solving')
        #filter out where = in middle if simpler eq exists
        simpleranswers = [
            x for x in answers
            if x.split(" ")[1] == '=' or x.split(" ")[-2] == "="
        ]
        if not answers:
            continue
        if simpleranswers:
            answers = simpleranswers
        else:
            print(answers)
            problematic.write("not simple : " + problem)
            continue

        answervals = [
            x for x in answers[0].split(" ")
            if x not in ['+', '-', '/', '=', ')', '(', '*']
        ]
        numvals = [x[0] for x in numlist if x[0] in answervals]
        xidx = numvals.index("x")
        rightidx = [
            i for i, x in enumerate(answers) if [
                z for z in x.split(" ")
                if z not in ['+', '-', '/', '=', ')', '(', '*']
            ].index('x') == xidx
        ]
        xrightanswers = [answers[i] for i in rightidx]
        if xrightanswers:
            answers = xrightanswers

        for j, eq in enumerate(answers):
            trips = []
            print(j, eq)
            l, r = [x.strip().split(' ') for x in eq.split('=')]

            compound = l if len(r) == 1 else r
            simplex = l if len(l) == 1 else r
            target = simplex[0]
            target = (target, objs[target])

            #find innermost parens?
            while len(compound) > 1:
                if "(" in compound:
                    rpidx = (len(compound) - 1) - compound[::-1].index('(')
                    lpidx = rpidx + compound[rpidx:].index(")")
                    subeq = compound[rpidx + 1:lpidx]
                    substr = "(" + ''.join(subeq) + ")"
                    compound = compound[:rpidx] + [substr
                                                   ] + compound[lpidx + 1:]
                else:
                    subeq = compound[0:3]
                    substr = "(" + ''.join(subeq) + ")"
                    compound = [substr] + compound[3:]
                if True:
                    p, op, e = subeq
                    #print(p,op,e)
                    p = objs[p]
                    e = objs[e]
                    op = op.strip()
                    trips.append((op, p, e))
                    pute = (0, makesets.combine(p[1], e[1], op))
                    #print("OPERATION SELECTED: ",op)
                    #p.details()
                    #e.details()
                    #print(substr,pute[1].num)
                    objs[substr] = pute
                if pute == -1:
                    exit()
            if simplex == l:
                trips.append(("=", objs[simplex[0]], objs[compound[0]]))
            else:
                trips.append(("=", objs[compound[0]], objs[simplex[0]]))
            t = training(trips, problem, target)
            for op in t:
                bigtexamples[op][0].extend(t[op][0])
                bigtexamples[op][1].extend(t[op][1])
            print(op, len(bigtexamples[op][0]))
    pickle.dump(bigtexamples, open('data/dev_training.pickle', 'wb'))
コード例 #20
0
ファイル: train_global.py プロジェクト: MathProblems/may
def infer(q,a,cutoff,VERBOSE):
    training = []
    wps = open(q).readlines()
    answs = open(a).readlines()
    problematic = open('somethingWrongProblems','a')

    ar = [0,0]
    sr = [0,0]
    mr = [0,0]
    dr = [0,0]

    right = 0
    guesses = 0
    ad = []
    wrong = []
    multiops = 0
    multiopsright = 0
    
    replacements = {' two ':' 2 '," three ":' 3 ',' four ':' 4 ',' five ':' 5 ',' six ':' 6 ',' seven ':' 7 ',' eight ':' 8 ',' nine ':' 9 ',' ten ':' 10 ',' eleven ':' 11 ', ' twice ':' 2 '}

    for k in range(len(wps)):
        if VERBOSE:
            for i in range(len(wps)):
                print(i,wps[i])
            k = int(input())
        print(k)
        problem = wps[k].lower()
        #First preprocessing, tokenize slightly
        problem = problem.strip().split(" ")
        for i,x in enumerate(problem):
            if len(x)==0:continue
            if x[-1] in [',','.','?']:
                problem[i] = x[:-1]+" "+x[-1]
        problem = ' '.join(problem)
        problem = " " + problem + " "
        print(problem)

        for r in replacements:
            problem = problem.replace(r,replacements[r])

        story = nlp.parse(problem)
        sets = makesets.makesets(story['sentences'])
            #REMOVE DUPS THIS IS BAD:
        i = 0
        while i < len(sets):
            x = sets[i]
            dups = [y for y in sets if y[1].num == x[1].num]
            if len(dups)>1:
                for x in dups[1:]:
                    sets.remove(x)
            i+=1

        print("Sets detected: ")
        for x in sets:
            x[1].details()
        numlist = [(cleannum(v.num),v) for k,v in sets]
        numlist = [x for x in numlist if x[0]!='']
        if VERBOSE:
            for z,v in numlist:
                v.details()
            input()

        allnumbs = {str(k):v for k,v in numlist}
            

        objs = {k:(0,v) for k,v in numlist}


        constraints = []
        for i in range(len(numlist)):
            if numlist[i][0][-1] == "*":
                if i==0:continue
                constraints.append(numlist[i-1][0]+" * "+numlist[i][0][:-1])
                numlist[i] = (''.join([x for x in numlist[i][0] if x not in ['*','/']]),numlist[i][1])
                numlist[i][1].num = numlist[i][0]
            elif numlist[i][0][0] == "*":
                if i==0:continue
                numlist[i] = (''.join([x for x in numlist[i][0] if x not in ['*','/']]),numlist[i][1])
                tmp = numlist[i-1]
                numlist[i-1]=numlist[i]
                numlist[i]=tmp
                constraints.append(numlist[i-1][0]+" * "+numlist[i][0][1:])
            elif numlist[i][0][-1] == "/":
                if i==0:continue
                constraints.append(" / "+numlist[i][0][:-1])
                numlist[i] = (''.join([x for x in numlist[i][0] if x not in ['*','/']]),numlist[i][1])
        objs = {k:(0,v) for k,v in numlist}
        if len(objs)<2:
            wrong.append(k)
            continue
        if 'x' not in objs:
            wrong.append(k)
            continue
        

        integerproblem = all([float(x[0]).is_integer() for x in numlist if x[0]!='x'])
        multi = False
        if len(objs)>3:
            multiops+=1
            multi = True
        if VERBOSE:
            print(objs,numlist,[v.num for k,v in sets])
        #print(allnumbs)


        state = []
        #print(numlist)

        

        #for e in allnumbs.items():
        #print(numlist)
        numidxlist = [x[0] for x in numlist]
        ST = StringTemplate(numidxlist, inf=True)
        scores = []
        equalsmatch = []
        contmatch = []
        for j,eq in enumerate(ST.equations):
            #print(j,eq.toString())
            good = False
            '''
            if len(constraints)==0:
                good = True
            else:
                for constraint in constraints:
                    if constraint in eq.toString():
                        good = True
            if not good:
                scores.append(-0.2)
                continue
            '''
            
                    
            thisscore = []
            #print(eq.toString())
            #determine score for this eq
            l,r = [x.strip().split(' ') for x in eq.toString().split('=')]
            #print(l,r)
            
            if len(r)>1 and len(l)>1:
                scores.append(-0.2);
                equalsmatch.append('x');
                contmatch.append('x')
                continue
            if len(r)>1: 

                compound = r
                target = l[0]
            else:
                #print(constraints)
                compound = l
                target = r[0]
            target = (target,objs[target])

            #find innermost parens?
            while len(compound)>1:
                if "(" in compound:
                    rpidx = (len(compound) - 1) - compound[::-1].index('(')
                    lpidx = rpidx+compound[rpidx:].index(")")
                    subeq = compound[rpidx+1:lpidx]
                    substr = "("+''.join(subeq)+")"
                    compound = compound[:rpidx]+[substr]+compound[lpidx+1:]
                else:
                    subeq = compound[0:3]
                    substr = "("+''.join(subeq)+")"
                    compound = [substr]+compound[3:]
                if substr in objs:
                    pute = objs[substr]
                    #print(pute[0],pute[1].num)
                else:
                    p,op,e = subeq
                    #print(p,op,e)
                    p = objs[p][1]
                    e = objs[e][1]
                    op = op.strip()
                    pute = compute(p,op,e,target,problem)
                    #print("OPERATION SELECTED: ",op)
                    #p.details()
                    #e.details()
                    #print(substr,pute[1].num)
                    objs[substr]=pute
                if pute == -1:
                    exit()
                score,c = pute
                thisscore.append(score)
            if target[1][1].entity == c.entity:
                #thisscore.append(-0.2)
                equalsmatch.append(1)
            else:
                equalsmatch.append(0)

            if target[1][1].container != c.container:
                contmatch.append(1)
            else: contmatch.append(0)

            scores.append(sum(thisscore))

            #print(compound)
        m = np.argmax(scores)
        #print(scores[m],ST.equations[m].toString())
        srt = sorted([(x,i) for i,x in enumerate(scores)],reverse=True)
        print('\n Top scoring 3 equations: ')
        for x,i in srt[:3]:
            print(x,ST.equations[i].toString())

        '''
        try:
            if target.ent=='dozen':
                guess = solve('('+numlist[0].num+'/12)'+"-"+target.num,'x')[0]
                print(numlist[0].num+"/12="+target.num)
            else:
                guess = solve(numlist[0].num+"-"+target.num,'x')[0]
                print(numlist[0].num+"="+target.num)
        '''
        eqidxs = [y[0] for y in sorted(enumerate(scores),key=lambda x:x[1],reverse=True)]
        seen = []
        tright = 0
        for i in eqidxs[:cutoff]:
            eq = ST.equations[i].toString()
            ogeq = ST.equations[i].toString()
            if equalsmatch[i]=='x':
                continue
            #eq = eq.replace("=",'-')
            splitEquation = eq.split('=')
            eq = splitEquation[0] + '- (' + splitEquation[1] + ')'
            #print(scores[i], eq)
            try:
                guess = solve(eq,'x')[0]
            except: continue
            
            if guess not in seen:
                seen.append(guess)
            else: 
                continue

            # in a "check for complex number" try statement :/
            try:
                if guess < 0:
                    pass
            except:
                continue


            answ = float(answs[k])
            vec = []

            #build training vector
            if guess == answ: vec.append(1)
            else: vec.append(0)

            vec.append(int(float(guess)<0))
            vec.append(int(integerproblem))
            vec.append(int(ogeq.index("=")==1))
            vec.append(int(eq.split(" ")[-1]==x))
            vec.append(equalsmatch[i])
            vec.append(contmatch[i])
            vec.append(int(guess.is_integer))
            #lexical items
            vec.append(int("at first " in problem))
            vec.append(int("start " in problem))
            vec.append(int(" now " in problem))
            vec.append(int(" total " in problem))
            vec.append(int(" equally " in problem))
            vec.append(int(" equal " in problem))
            training.append(vec)

    f = open("data/single.global.data",'w') 
    for v in training:
        f.write(str(v[0])+" ")
        for i,j in enumerate(v[1:]):
            f.write(str(i+1)+":"+str(j)+" ")
        f.write("\n")
コード例 #21
0
ファイル: newinference.py プロジェクト: rootcanal/August
def make_eq(q, a, equations):
    wps = q  # open(q).readlines()
    right = 0
    wrong = 0

    for k in range(len(wps)):
        answers = utils.get_k_eqs(equations[k], g=True, a=True)
        if answers == []:
            continue
        seeneq = []
        seen = []
        for x in answers:
            if x[1] not in seeneq:
                seen.append(x)
                seeneq.append(x[1])
        answers = seen
        answers = list(set(answers))

        #First preprocessing, tokenize slightly
        problem = utils.preprocess_problem(wps[k])
        print(problem)

        #make story
        #story = nlp.parse(problem)
        story = utils.read_parse(int(equations[k]))
        sets = makesets.makesets(story['sentences'])
        i = 0

        ######
        for x in sets:
            x[1].details()
        #continue

        xidx = [i for i, x in enumerate(sets) if x[1].num == 'x']
        if not xidx:
            print("NO X WHY")
            continue

        #TODO look for 2 xes
        xidx = xidx[0]

        numlist = [(utils.cleannum(v.num), v) for k, v in sets]
        numlist = [x for x in numlist if x[0] != '']
        objs = {k: (0, v) for k, v in numlist}
        print(objs.items())
        consts = [x for x in answers[0][1].split(" ")
                  if x not in ['(', ')', '+', '-', '/', '*', '=', ]]
        present = [x for x in consts if x in objs]
        if consts != present:
            print(present, consts)
            print("missing thing")
            continue
        if len([x for x in objs if x not in consts]) > 0:
            print("missing thing")
            continue
        scores = []

        for j, eq, cons, guess in answers:
            consts = [x for x in eq.split(" ")
                      if x not in ['(', ')', '+', '-', '/', '*', '=', ]]
            order = int(consts == [x[0] for x in numlist])
            if order == 0:
                continue
            #j = randint(0,len(answers)-1)
            #eq = answers[j]
            #print(j,eq)
            l, r = [x.strip().split(' ') for x in eq.split('=')]
            consts = " ".join([x for x in answers[0][1].split(" ")
                               if x not in ['(', ')', '+', '-', '/', '*', ]])
            consts = consts.split(" = ")
            target = 'x'
            target = (target, objs[target])

            #find innermost parens?
            sides = []
            thisscore = []
            for i, compound in enumerate([l, r]):
                while len(compound) > 1:
                    if "(" in compound:
                        rpidx = (len(compound) - 1) - compound[::-1].index('(')
                        lpidx = rpidx + compound[rpidx:].index(")")
                        subeq = compound[rpidx + 1:lpidx]
                        substr = "(" + ''.join(subeq) + ")"
                        compound = compound[:rpidx]
                        compound += [substr] + compound[lpidx + 1:]
                    else:
                        subeq = compound[0:3]
                        substr = "(" + ''.join(subeq) + ")"
                        compound = [substr] + compound[3:]
                    p, op, e = subeq
                    p = objs[p]
                    e = objs[e]
                    op = op.strip()
                    pute = compute(p, op, e, target, problem, story, order)
                    objs[substr] = pute
                    if pute == -1:
                        exit()
                    score, c, vals = pute
                    thisscore.append(score)
                sides.append(objs[compound[0]])
            p = sides[0]
            e = sides[1]
            score = 1
            for s in thisscore:
                score *= s
            score *= compute(
                p, '=', e, target, problem, story, order, score, cons
            )[0]
            scores.append((score, j, eq, guess))
        scores = sorted(scores, reverse=True)
        righties = [x for x in scores if x[1] == 1]
        print(scores[:3])
        if not righties:
            wrong += 1
            print("TOP SCORING NO CORRECT SOLUTION \nINCORRECT")
            continue

        if len(scores) > 0:
            if scores[0][1] == 1:
                right += 1
                print("CORRECT")
            else:
                wrong += 1
                print("INCORRECT")
        else:
            wrong += 1
            print("INCORRECT")

    return (right, wrong)
コード例 #22
0
ファイル: gettopeq.py プロジェクト: MathProblems/may2
def make_eq(q, a, VERBOSE, TRAIN):
    bigtexamples = {x: ([], []) for x in ["+", "*", '/', '-', '=']}
    wps = q  #open(q).readlines()
    answs = a  #open(a).readlines()
    if not TRAIN and not VERBOSE:
        out = open("whatever.out.txt", 'w')
    problematic = open('somethingWrongProblems', 'w')

    replacements = {
        ' two ': ' 2 ',
        " three ": ' 3 ',
        ' four ': ' 4 ',
        ' five ': ' 5 ',
        ' six ': ' 6 ',
        ' seven ': ' 7 ',
        ' eight ': ' 8 ',
        ' nine ': ' 9 ',
        ' ten ': ' 10 ',
        ' eleven ': ' 11 ',
        ' twice ': ' 2 '
    }

    topeq = open("topeq.txt", 'w')
    for k in range(len(wps)):
        if VERBOSE:
            for i in range(len(wps)):
                print(i, wps[i])
            k = int(input())
        print(k)
        problem = wps[k].lower()
        #First preprocessing, tokenize slightly
        problem = problem.strip().split(" ")
        for i, x in enumerate(problem):
            if len(x) == 0: continue
            if x[-1] in [',', '.', '?']:
                problem[i] = x[:-1] + " " + x[-1]
        problem = ' '.join(problem)
        problem = " " + problem + " "
        print(problem)

        for r in replacements:
            problem = problem.replace(r, replacements[r])
        '''
        if " how " in problem:
            left,right = problem.split(" how ")
        else: left = problem

        for r in replacements:
            left = left.replace(r,replacements[r])
        if " how " in problem:
            problem = left + ' how ' + right
        else:
            problem = left
        '''

        story = nlp.parse(problem)
        sets = makesets.makesets(story['sentences'])
        i = 0
        print(sets)
        while i < len(sets):
            dups = [y for y in sets if y[1].idx != None]
            dups = [y for y in dups if y[1].idx == sets[i][1].idx]
            if len(dups) > 1:
                good = [
                    y for y in dups
                    if len([x for x in y[1].num if x.isdigit()]) > 0
                ]
                if good:
                    others = [x for x in dups if x != good[0]]
                    for x in others:
                        sets.remove(x)
                else:
                    # just pick 1
                    for x in dups[1:]:
                        sets.remove(x)
            i += 1

        xidx = [i for i, x in enumerate(sets) if x[1].num == 'x']
        if not xidx:
            problematic.write('no x :' + problem)
            continue

        #TODO look for 2 xes
        xidx = xidx[0]
        twoToRight = False
        if xidx > 0:
            print(len(sets), xidx)
            if sets[xidx - 1][1].entity in ['dozen', 'bill']:
                # 2 vals to right
                twoToRight = True
        if len(sets) - xidx > 1:
            if sets[xidx + 1][1].entity == 'dozen':
                twoToRight = True
        if len(sets) - xidx < 3:
            if sets[xidx][1].entity == 'dozen':
                twoToRight = True

        numlist = [(cleannum(v.num), v) for k, v in sets]
        numlist = [x for x in numlist if x[0] != '']
        if VERBOSE:
            for z, v in numlist:
                v.details()
            input()

        allnumbs = {str(k): v for k, v in numlist}

        objs = {k: (0, v) for k, v in numlist}

        print('start solving')
        print(numlist)
        if len(numlist) < 2:
            problematic.write("not enough numbers : " + problem)
            continue

        values = [x[0] for x in numlist if x[0] != 'x']
        print(values)
        ST = Solver(values)

        answers = []
        answers = ST.solveEquations(float(answs[k]))
        print(answs[k])
        if not answers:
            problematic.write("No answers : " + problem + "\n")
            problematic.write(str([x[0] for x in numlist]) + '\n')
            problematic.write(answs[k] + '\n')
            continue
        print('done solving')

        # if target has 2 entities, try eqs with = x op y format
        simpleranswers = None
        if twoToRight:
            try:
                simpleranswers = [
                    x for x in answers if x.split(" ")[-4] == "=" and (
                        x.split(" ")[-3] == 'x' or x.split(' ')[-1] == 'x')
                ]
            except:
                pass
        if not simpleranswers:
            simpleranswers = [
                x for x in answers
                if x.split(" ")[1] == '=' or x.split(" ")[-2] == "="
            ]
        #simpleranswers = [x for x in answers if x.split(" ")[1] == '=' or x.split(" ")[-2]=="="]

        #filter out where = in middle if simpler eq exists
        if simpleranswers:
            print(answers)
            answers = simpleranswers[:]
        else:
            problematic.write("not simple : " + problem + "\n")
            continue

        values = [x[0] for x in numlist]
        xidx = values.index('x')
        print(simpleranswers)
        print(xidx)
        for a in simpleranswers:
            aspl = [
                x for x in a.split(" ")
                if x not in ["/", "-", '+', '*', '=', '(', ')']
            ]
            print(a)
            print(aspl)
            print(values)
            aidx = aspl.index('x')
            print(aidx)
            if aidx != xidx:
                print("removing ", a)
                answers.remove(a)
        print(answers)
        if answers == []:
            answers = simpleranswers

        print(answers)
        if not VERBOSE:
            if not TRAIN:
                out.write(problem + '\n')
                out.write(answs[k] + "\n")
                out.write(str([x[0] for x in numlist]))
                out.write("\n")
                for x in answers:
                    out.write(x + "\n")
                out.write("___\n")

        if len([x for x in answers if x.split(" ")[-2] == "="]) > 0:
            answers = [x for x in answers if x.split(" ")[-2] == "="]

        c = randint(0, len(answers) - 1)
        answers = [answers[c]]
        topeq.write(str(k) + " : " + str(answers[0]) + "\n")
コード例 #23
0
ファイル: dev_mktraindata.py プロジェクト: MathProblems/may
def dotrain():
    if len(sys.argv)>1:
        wps = open(sys.argv[1]).readlines()
        answs = open(sys.argv[2]).readlines()
    else:
        wps = open("emnlp_noIrrelev_p.txt").readlines()
        answs = open("emnlp_noIrrelev_a.txt").readlines()
    problematic = open('nogoodtrainproblems','w')

    bigtexamples = {x:([],[]) for x in ["+","*",'/','-','=']}
    replacements = {' two ':' 2 '," three ":' 3 ',' four ':' 4 ',' five ':' 5 ',' six ':' 6 ',' seven ':' 7 ',' eight ':' 8 ',' nine ':' 9 ',' ten ':' 10 ',' eleven ':' 11 ',' week ':' 7 days ',' dozen ':' 12 of ', ' dozens ': ' 12 ', ' twice ':' 2 '}

    for k in range(len(wps)):
        print(k)
        problem = wps[k].lower()
        for r in replacements:
            problem = problem.replace(r,replacements[r])
        #extract numbers:
        #problem = ' '.join([x.replace(",","") for x in problem.split()])
        story = nlp.parse(problem)
        numbs = makesets.makesets(story['sentences'])

        numlist = [(cleannum(v.num),v) for k,v in numbs]
        numlist = [x for x in numlist if x[0]!='']

        allnumbs = {str(k):v for k,v in numlist}
        if 'x' not in allnumbs:
            if 'x*' not in allnumbs:
                problematic.write('no x :'+problem); continue
            

        objs = {k:(0,v) for k,v in numlist}

        print('start solving')
        print(numlist)
        if len(numlist)<2:
            problematic.write("not enough numbers : "+problem);continue
            
        ST = Solver([x[0] for x in numlist if x[0]!='x'])
        answers = ST.solveEquations(float(answs[k]))
        print('done solving')
        #filter out where = in middle if simpler eq exists
        simpleranswers = [x for x in answers if x.split(" ")[1] == '=' or x.split(" ")[-2]=="="]
        if not answers:
            continue
        if simpleranswers:
            answers = simpleranswers
        else:
            print(answers)
            problematic.write("not simple : "+problem);continue

        answervals = [x for x in answers[0].split(" ") if x not in ['+','-','/','=',')','(','*']]
        numvals = [x[0] for x in numlist if x[0] in answervals]
        xidx = numvals.index("x")
        rightidx = [i for i,x in enumerate(answers) if [z for z in x.split(" ") if z not in ['+','-','/','=',')','(','*']].index('x')==xidx]
        xrightanswers = [answers[i] for i in rightidx]
        if xrightanswers:
            answers = xrightanswers

        for j,eq in enumerate(answers):
            trips = []
            print(j,eq)
            l,r = [x.strip().split(' ') for x in eq.split('=')]
            
            compound = l if len(r)==1 else r
            simplex = l if len(l)==1 else r
            target = simplex[0]
            target = (target,objs[target])

            #find innermost parens?
            while len(compound)>1:
                if "(" in compound:
                    rpidx = (len(compound) - 1) - compound[::-1].index('(')
                    lpidx = rpidx+compound[rpidx:].index(")")
                    subeq = compound[rpidx+1:lpidx]
                    substr = "("+''.join(subeq)+")"
                    compound = compound[:rpidx]+[substr]+compound[lpidx+1:]
                else:
                    subeq = compound[0:3]
                    substr = "("+''.join(subeq)+")"
                    compound = [substr]+compound[3:]
                if True:
                    p,op,e = subeq
                    #print(p,op,e)
                    p = objs[p]
                    e = objs[e]
                    op = op.strip()
                    trips.append((op,p,e))
                    pute = (0,makesets.combine(p[1],e[1],op))
                    #print("OPERATION SELECTED: ",op)
                    #p.details()
                    #e.details()
                    #print(substr,pute[1].num)
                    objs[substr]=pute
                if pute == -1:
                    exit()
            if simplex == l:
                trips.append(("=",objs[simplex[0]],objs[compound[0]]))
            else:
                trips.append(("=",objs[compound[0]],objs[simplex[0]]))
            t = training(trips,problem,target)
            for op in t:
                bigtexamples[op][0].extend(t[op][0])
                bigtexamples[op][1].extend(t[op][1])
            print(op,len(bigtexamples[op][0]))
    pickle.dump(bigtexamples,open('data/dev_training.pickle','wb'))
コード例 #24
0
ファイル: train_local.py プロジェクト: rootcanal/August
def make_eq(q, a, equations):
    bigtexamples = {x: ([], []) for x in ["+", "*", '/', '-', '=']}
    wps = q  # open(q).readlines()

    for k in range(len(wps)):

        # First preprocessing, tokenize slightly
        problem = utils.preprocess_problem(wps[k])
        print(k)
        print(problem)

        # story = nlp.parse(problem)
        story = utils.read_parse(int(equations[k]))
        eqs = utils.get_k_eqs(equations[k])
        answers = [x[1] for x in eqs if x[0] == 1]
        if answers == []:
            continue
        answers = list(set(answers))
        print(story["sentences"][0]["text"])
        print(answers)

        #make story
        #story = nlp.parse(problem)
        sets = makesets.makesets(story['sentences'])
        i = 0

        xidx = [i for i, x in enumerate(sets) if x[1].num == 'x']
        if not xidx:
            print("NO X WHY")
            continue

        numlist = [(utils.cleannum(v.num), v) for k, v in sets]
        numlist = [x for x in numlist if x[0] != '']
        objs = {k: (0, v) for k, v in numlist}
        print(objs.items())
        consts = [
            x for x in answers[0].split(" ") if x not in [
                '(',
                ')',
                '+',
                '-',
                '/',
                '*',
                '=',
            ]
        ]
        present = [x for x in consts if x in objs]
        if present != consts:
            print(present, consts)
            print("missing thing")
            exit()

        #simpleanswers = []
        for j, eq in enumerate(answers):
            trips = []
            print(j, eq)
            l, r = [x.strip().split(' ') for x in eq.split('=')]
            target = 'x'
            target = (target, objs[target])

            #find innermost parens?
            for i, compound in enumerate([l, r]):
                while len(compound) > 1:
                    if "(" in compound:
                        rpidx = (len(compound) - 1) - compound[::-1].index('(')
                        lpidx = rpidx + compound[rpidx:].index(")")
                        subeq = compound[rpidx + 1:lpidx]
                        substr = "(" + ''.join(subeq) + ")"
                        compound = compound[:rpidx] + [substr
                                                       ] + compound[lpidx + 1:]
                    else:
                        subeq = compound[0:3]
                        substr = "(" + ''.join(subeq) + ")"
                        compound = [substr] + compound[3:]
                    if True:
                        p, op, e = subeq
                        p = objs[p]
                        e = objs[e]
                        op = op.strip()
                        trips.append((op, p, e))
                        pute = (0, makesets.combine(p[1], e[1], op))
                        objs[substr] = pute
                    if pute == -1:
                        exit()
            t = training(trips, problem, story, target)
            for op in t:
                bigtexamples[op][0].extend(t[op][0])
                bigtexamples[op][1].extend(t[op][1])
    with open('data/' + sys.argv[1][-1] + ".local.training", 'wb') as f:
        pickle.dump(bigtexamples, f)
コード例 #25
0
ファイル: make_equations.py プロジェクト: MathProblems/may
def make_eq(q,a,VERBOSE,TRAIN):
    bigtexamples = {x:([],[]) for x in ["+","*",'/','-','=']}
    wps = open(q).readlines()
    answs = open(a).readlines()
    if not TRAIN and not VERBOSE:
        out = open(q+".out.txt",'w')
    problematic = open('somethingWrongProblems','w')


    

    replacements = {' two ':' 2 '," three ":' 3 ',' four ':' 4 ',' five ':' 5 ',' six ':' 6 ',' seven ':' 7 ',' eight ':' 8 ',' nine ':' 9 ',' ten ':' 10 ',' eleven ':' 11 ', ' twice ':' 2 '}

    for k in range(len(wps)):
        if VERBOSE:
            for i in range(len(wps)):
                print(i,wps[i])
            k = int(input())
        print(k)
        problem = wps[k].lower()
        #First preprocessing, tokenize slightly
        problem = problem.strip().split(" ")
        for i,x in enumerate(problem):
            if len(x)==0:continue
            if x[-1] in [',','.','?']:
                problem[i] = x[:-1]+" "+x[-1]
        problem = ' '.join(problem)
        problem = " " + problem + " "
        print(problem)

        for r in replacements:
            problem = problem.replace(r,replacements[r])
        
        '''
        if " how " in problem:
            left,right = problem.split(" how ")
        else: left = problem

        for r in replacements:
            left = left.replace(r,replacements[r])
        if " how " in problem:
            problem = left + ' how ' + right
        else:
            problem = left
        '''

        story = nlp.parse(problem)
        sets = makesets.makesets(story['sentences'])
        i = 0
        print(sets)
        while i < len(sets):
            dups = [y for y in sets if y[1].idx != None]
            dups = [y for y in dups if y[1].idx == sets[i][1].idx]
            if len(dups)>1:
                good = [y for y in dups if len([x for x in y[1].num if x.isdigit()])>0]
                if good:
                    others = [x for x in dups if x!=good[0]]
                    for x in others:
                        sets.remove(x)
                else:
                    # just pick 1
                    for x in dups[1:]:
                        sets.remove(x)
            i+=1


        xidx = [x for x in sets if x[1].num=='x']
        if not xidx:
            problematic.write('no x :'+problem); continue

        #TODO look for 2 xes
        '''
        xidx = xidx[0][0]
        postx = [x for x in numbs if x[0]>=xidx]
        if len(postx)>1:
            # 2 vals to right
            twoToRight = True
        else:
            twoToRight = False
        '''

        



        numlist = [(cleannum(v.num),v) for k,v in sets]
        numlist = [x for x in numlist if x[0]!='']
        if VERBOSE:
            for z,v in numlist:
                v.details()
            input()

        allnumbs = {str(k):v for k,v in numlist}
            

        objs = {k:(0,v) for k,v in numlist}

        print('start solving')
        print(numlist)
        if len(numlist)<2:
            problematic.write("not enough numbers : "+problem);continue
            
        values = [x[0] for x in numlist if x[0]!='x']
        print(values)
        ST = Solver(values)

        answers = []
        answers = ST.solveEquations(float(answs[k]))
        if not answers:
            problematic.write("No answers : " + problem + "\n")
            problematic.write(str([x[0] for x in numlist])+'\n')
            problematic.write(answs[k]+'\n')
            continue
        print('done solving')

        # if target has 2 entities, try eqs with = x op y format
        '''
        simpleranswers = None
        if twoToRight:
            try:
                simpleranswers = [x for x in answers if x.split(" ")[-4]=="=" and x.split(" ")[-3]=='x']
            except:
                pass
        if not simpleranswers:
            simpleranswers = [x for x in answers if x.split(" ")[1] == '=' or x.split(" ")[-2]=="="]
        '''
        simpleranswers = [x for x in answers if x.split(" ")[1] == '=' or x.split(" ")[-2]=="="]

        #filter out where = in middle if simpler eq exists
        if simpleranswers:
            answers = simpleranswers[:]
        else:
            problematic.write("not simple : "+problem+"\n");continue

        values = [x[0] for x in numlist]
        xidx = values.index('x')
        print(xidx)
        for a in simpleranswers:
            aspl = [x for x in a.split(" ") if x not in ["/","-",'+','*','=','(',')']]
            print(a);print(aspl);print(values)
            aidx = aspl.index('x')
            print(aidx)
            if aidx != xidx:
                print("removing ",a)
                answers.remove(a)
        print(answers)
        if answers==[]:
            answers = simpleranswers


        
        print(answers)
        if not VERBOSE:
            if not TRAIN:
                out.write(problem + '\n')
                out.write(answs[k] + "\n")
                out.write(str([x[0] for x in numlist]))
                out.write("\n")
                for x in answers:
                    out.write(x + "\n")
                out.write("___\n")

        if VERBOSE:
            input()
        if not TRAIN:
            continue

        c = randint(0,len(answers)-1)
        answers = [answers[c]]
        for j,eq in enumerate(answers):
            trips = []
            print(j,eq)
            l,r = [x.strip().split(' ') for x in eq.split('=')]
            
            compound = r if len(l)==1 else l
            simplex = l if len(l)==1 else r
            target = simplex[0]
            target = (target,objs[target])

            #find innermost parens?
            while len(compound)>1:
                if "(" in compound:
                    rpidx = (len(compound) - 1) - compound[::-1].index('(')
                    lpidx = rpidx+compound[rpidx:].index(")")
                    subeq = compound[rpidx+1:lpidx]
                    substr = "("+''.join(subeq)+")"
                    compound = compound[:rpidx]+[substr]+compound[lpidx+1:]
                else:
                    subeq = compound[0:3]
                    substr = "("+''.join(subeq)+")"
                    compound = [substr]+compound[3:]
                if True:
                    p,op,e = subeq
                    #print(p,op,e)
                    p = objs[p]
                    e = objs[e]
                    op = op.strip()
                    trips.append((op,p,e))
                    pute = (0,makesets.combine(p[1],e[1],op))
                    #print("OPERATION SELECTED: ",op)
                    #p.details()
                    #e.details()
                    #print(substr,pute[1].num)
                    objs[substr]=pute
                if pute == -1:
                    exit()
            if simplex == l:
                trips.append(("=",objs[simplex[0]],objs[compound[0]]))
            else:
                trips.append(("=",objs[compound[0]],objs[simplex[0]]))
            t = training(trips,problem,target)
            for op in t:
                bigtexamples[op][0].extend(t[op][0])
                bigtexamples[op][1].extend(t[op][1])
            print(op,len(bigtexamples[op][0]))
    if TRAIN:
        pickle.dump(bigtexamples,open('data/'+OUT+".training",'wb'))
コード例 #26
0
def make_eq(q, a, equations):
    tdata = []
    wps = q  # open(q).readlines()

    for k in range(len(wps)):
        print(k, equations[k])
        answers = utils.get_k_eqs(equations[k], g=True)
        good = list(set([x for x in answers if x[0] == 1]))
        bad = list(set([x for x in answers if x[0] == 0]))[:len(good)]
        '''
        if len(bad)>len(good):
            bad = sample(bad,len(good))
        '''
        answers = good + bad
        if answers == []:
            continue
        answers = list(set(answers))

        # First preprocessing, tokenize slightly
        problem = utils.preprocess_problem(wps[k])
        print(problem)

        #make story
        #story = nlp.parse(problem)
        story = utils.read_parse(int(equations[k]))
        sets = makesets.makesets(story['sentences'])
        i = 0

        xidx = [i for i, x in enumerate(sets) if x[1].num == 'x']
        if not xidx:
            print("NO X WHY")
            continue

        #TODO look for 2 xes
        xidx = xidx[0]

        numlist = [(utils.cleannum(v.num), v) for k, v in sets]
        numlist = [x for x in numlist if x[0] != '']
        objs = {k: (0, v) for k, v in numlist}
        #print(numlist)
        consts = [
            x for x in answers[0][1].split(" ") if x not in [
                '(',
                ')',
                '+',
                '-',
                '/',
                '*',
                '=',
            ]
        ]
        #print(consts)
        present = [x for x in consts if x in objs]
        if present != consts:
            print(present, consts)
            print("missing thing")
            continue

        #print(answers)

        for j, eq, cons in answers:
            consts = [
                x for x in eq.split(" ") if x not in [
                    '(',
                    ')',
                    '+',
                    '-',
                    '/',
                    '*',
                    '=',
                ]
            ]
            order = int(consts == [x[0] for x in numlist])
            if order == 0:
                continue
            print(j, eq)
            l, r = [x.strip().split(' ') for x in eq.split('=')]
            consts = " ".join([
                x for x in answers[0][1].split(" ") if x not in [
                    '(',
                    ')',
                    '+',
                    '-',
                    '/',
                    '*',
                ]
            ])
            consts = consts.split(" = ")
            target = 'x'
            target = (target, objs[target])

            #find innermost parens?
            sides = []
            thisscore = []
            for i, compound in enumerate([l, r]):
                while len(compound) > 1:
                    if "(" in compound:
                        rpidx = (len(compound) - 1) - compound[::-1].index('(')
                        lpidx = rpidx + compound[rpidx:].index(")")
                        subeq = compound[rpidx + 1:lpidx]
                        substr = "(" + ''.join(subeq) + ")"
                        compound = compound[:rpidx] + [substr]
                        compound += compound[lpidx + 1:]
                    else:
                        subeq = compound[0:3]
                        substr = "(" + ''.join(subeq) + ")"
                        compound = [substr] + compound[3:]
                    p, op, e = subeq
                    p = objs[p]
                    e = objs[e]
                    op = op.strip()
                    pute = compute(p, op, e, target, problem, story, order)
                    objs[substr] = pute
                    if pute == -1:
                        exit()
                    score, c, vals = pute
                    thisscore.append(score)
                sides.append(objs[compound[0]])
            p = sides[0]
            e = sides[1]
            score = 1
            for s in thisscore:
                score *= s
            #scores.append((score,j,eq))
            tdata.append(
                training(sides[0], sides[1], problem, story, target, j, order,
                         score, cons))

    with open("data/" + sys.argv[1][-1] + ".global.data", 'w') as f:
        for v in tdata:
            f.write(str(v[0]) + " ")
            for i, j in enumerate(v[1:]):
                f.write(str(i + 1) + ":" + str(j) + " ")
            f.write("\n")
コード例 #27
0
def make_eq(q, a, equations):
    wps = q  #open(q).readlines()
    answs = a  #open(a).readlines()
    right = 0
    wrong = 0
    #IRR = open("output_relevant.txt").readlines()
    #IRR = [x.strip().split(" ") for x in IRR]
    #IRR = [[y.split(",")[1:] for y in x] for x in IRR]

    for k in range(len(wps)):
        answers = javad_train_local.get_k_eqs(equations[k], g=True, a=True)
        answers = [x for x in answers if x[1].split()[-2] == '=']
        answers = [
            x for x in answers
            if x[1].split()[-1] == 'x' or x[1].split()[-3] == 'x'
        ]

        if answers == []: continue
        seeneq = []
        seen = []
        for x in answers:
            if x[1] not in seeneq:
                seen.append(x)
                seeneq.append(x[1])
        answers = seen
        answers = list(set(answers))

        #First preprocessing, tokenize slightly
        problem = wps[k]  #.lower()

        problem = problem.strip().split(" ")
        for i, x in enumerate(problem):
            if len(x) == 0: continue
            if x[-1] in [',', '.', '?']:
                problem[i] = x[:-1] + " " + x[-1]
        problem = ' '.join(problem)
        problem = " " + problem + " "
        print(problem)

        #irrelev = IRR[k]
        #if len(set([x[1] for x in irrelev]))==1:
        #    irrelev = False
        #make story
        story = nlp.parse(problem)
        sets = makesets.makesets(story['sentences'])
        i = 0

        ######
        for x in sets:
            x[1].details()
        #continue

        xidx = [i for i, x in enumerate(sets) if x[1].num == 'x']
        if not xidx:
            print("NO X WHY")
            continue

        #TODO look for 2 xes
        xidx = xidx[0]

        numlist = [(cleannum(v.num), v) for k, v in sets]
        numlist = [x for x in numlist if x[0] != '']
        allnumbs = {str(k): v for k, v in numlist}
        objs = {k: (0, v) for k, v in numlist}
        print(objs.items())
        consts = [
            x for x in answers[0][1].split(" ")
            if x not in ['(', ')', '+', '-', '/', '*', '=', 'x']
        ]
        present = [x for x in consts if x in objs]
        if consts != present:
            print(present, consts)
            print("missing thing")
            continue
        #if len([x for x in objs if x not in consts])>0: print("missing thing");continue
        scores = []

        ############# THING
        '''
        numlist2 = [x[0] for x in numlist if x[0] != 'x']
        ok = True
        if len(numlist2)==2:
            a = float(numlist2[0])
            b = float(numlist2[1])
            if a < b:
                answers = [x for x in answers if (x[1].split()[-1]=='x' and x[1].split()[1]=='+') or (x[1].split()[1]=='-' and x[1].split()[2]=='x')]
                ok = False
        if ok:
            answers = [x for x in answers if x[1].split()[-1]=='x']

        thing = problem.split(".")[-1]
        if ' how ' in problem:
            problem = problem.split(" how ")[-1]
        thing = ''.join([x.lower() for x in thing if x.isalpha() or x==' '])
        thing = [x for x in thing.split(" ") if x in ['more','farther','taller','longer','less']]
        if len(thing)>0:
            numlist2 = [x[0] for x in numlist if x[0] != 'x']
            print(numlist2)
            if len(numlist2)==2:
                print("THING HAPPENING")
                a = numlist2[0]
                b = numlist2[1]
                print(a,b)
                an = answs[k].strip()
                a = float(a)
                b = float(b)
                if a<b:
                    answers = b - a # str(a)+" + "+str(an)+" == "+str(b)
                else:
                    answers = a - b #str(a)+" - "+str(b)+" == "+str(an)
                if abs(answers - float(an))<0.001:
                    print("CORRECT")
                    right+=1
                else:
                    print("INCORRECT")
                    wrong+=1
                continue

        '''

        for j, eq, cons, guess in answers:
            eqspl = eq.split(" = ")
            consts = [
                x for x in eq.split(" ")
                if x not in ['(', ')', '+', '-', '/', '*', '=', 'x']
            ]
            #order = int(consts==[x[0] for x in numlist])
            order = int(consts == [x[0] for x in numlist if x[0] in consts])

            if order == 0: continue
            #j = randint(0,len(answers)-1)
            #eq = answers[j]
            trips = []
            #print(j,eq)
            l, r = [x.strip().split(' ') for x in eq.split('=')]
            consts = " ".join([
                x for x in answers[0][1].split(" ") if x not in [
                    '(',
                    ')',
                    '+',
                    '-',
                    '/',
                    '*',
                ]
            ])
            consts = consts.split(" = ")
            sp = (objs[consts[0].split(" ")[-1]],
                  objs[consts[1].split(" ")[0]])

            target = 'x'
            target = (target, objs[target])

            #find innermost parens?
            sides = []
            thisscore = []
            for i, compound in enumerate([l, r]):
                while len(compound) > 1:
                    if "(" in compound:
                        rpidx = (len(compound) - 1) - compound[::-1].index('(')
                        lpidx = rpidx + compound[rpidx:].index(")")
                        subeq = compound[rpidx + 1:lpidx]
                        substr = "(" + ''.join(subeq) + ")"
                        compound = compound[:rpidx] + [substr
                                                       ] + compound[lpidx + 1:]
                    else:
                        subeq = compound[0:3]
                        substr = "(" + ''.join(subeq) + ")"
                        compound = [substr] + compound[3:]
                    p, op, e = subeq
                    p = objs[p]
                    e = objs[e]
                    op = op.strip()
                    pute = compute(p, op, e, target, problem, story, order)
                    objs[substr] = pute
                    if pute == -1:
                        exit()
                    score, c, vals = pute
                    thisscore.append(score)
                sides.append(objs[compound[0]])
            p = sides[0]
            e = sides[1]
            score = 1
            for s in thisscore:
                score *= s
            score *= compute(p, '=', e, target, problem, story, order, score,
                             cons)[0]
            scores.append((score, j, eq, guess))
        scores = sorted(scores, reverse=True)
        righties = [x for x in scores[:3] if x[1] == 1]
        if not righties:
            wrong += 1
            print(scores[:3])
            print("TOP SCORING NO CORRECT SOLUTION \nINCORRECT")
            continue
        else:
            print(scores[:3])
            corr = righties[0][3]
        '''
        guessd = {}
        for x in scores[:3]:
            if x[3] not in guessd:
                guessd[x[3]]=x[0]
            else:
                guessd[x[3]]+=x[0]

        guessd = sorted(guessd.items(),key=lambda x: x[1],reverse=True)

        if guessd[0][0]==corr:
            right+=1
            print("CORRECT")
        else:
            wrong += 1
            print("INCORRECT")

        '''
        if len(scores) > 0:
            if scores[0][1] == 1:
                right += 1
                print("CORRECT")
            else:
                wrong += 1
                print("INCORRECT")
        else:
            wrong += 1
            print("INCORRECT")

    return (right, wrong)
コード例 #28
0
ファイル: inference.py プロジェクト: MathProblems/may2
    for k in range(len(wps)):
        if VERBOSE:
            for i in range(len(wps)):
                print(i,wps[i])
            k = int(input())
        print(wps[k])
        problem = wps[k].lower()
        for r in replacements:
            problem = problem.replace(r,replacements[r])
        #extract numbers:
        problem = ' '.join([x.replace(",","") for x in problem.split()])



        story = nlp.parse(problem)
        numbs = makesets.makesets(story['sentences'])
        numlist = [(cleannum(v.num),v) for k,v in numbs]
        numlist = [x for x in numlist if x[0]!='']

        allnumbs = {str(k):v for k,v in numlist}
        for v,x in numlist:
            x.details()
        constraints = []
        for i in range(len(numlist)):
            if numlist[i][0][-1] == "*":
                if i==0:continue
                constraints.append(numlist[i-1][0]+" * "+numlist[i][0][:-1])
                numlist[i] = (''.join([x for x in numlist[i][0] if x not in ['*','/']]),numlist[i][1])
                numlist[i][1].num = numlist[i][0]
            elif numlist[i][0][0] == "*":
                if i==0:continue
コード例 #29
0
ファイル: train_local.py プロジェクト: rootcanal/August
def make_eq(q, a, equations):
    bigtexamples = {x: ([], []) for x in ["+", "*", '/', '-', '=']}
    wps = q  # open(q).readlines()

    for k in range(len(wps)):

        # First preprocessing, tokenize slightly
        problem = utils.preprocess_problem(wps[k])
        print(k)
        print(problem)

        # story = nlp.parse(problem)
        story = utils.read_parse(int(equations[k]))
        eqs = utils.get_k_eqs(equations[k])
        answers = [x[1] for x in eqs if x[0] == 1]
        if answers == []:
            continue
        answers = list(set(answers))
        print(story["sentences"][0]["text"])
        print(answers)

        #make story
        #story = nlp.parse(problem)
        sets = makesets.makesets(story['sentences'])
        i = 0

        xidx = [i for i, x in enumerate(sets) if x[1].num == 'x']
        if not xidx:
            print("NO X WHY")
            continue

        numlist = [(utils.cleannum(v.num), v) for k, v in sets]
        numlist = [x for x in numlist if x[0] != '']
        objs = {k: (0, v) for k, v in numlist}
        print(objs.items())
        consts = [x for x in answers[0].split(" ")
                  if x not in ['(', ')', '+', '-', '/', '*', '=', ]]
        present = [x for x in consts if x in objs]
        if present != consts:
            print(present, consts)
            print("missing thing")
            exit()

        #simpleanswers = []
        for j, eq in enumerate(answers):
            trips = []
            print(j, eq)
            l, r = [x.strip().split(' ') for x in eq.split('=')]
            target = 'x'
            target = (target, objs[target])

            #find innermost parens?
            for i, compound in enumerate([l, r]):
                while len(compound) > 1:
                    if "(" in compound:
                        rpidx = (len(compound) - 1) - compound[::-1].index('(')
                        lpidx = rpidx+compound[rpidx:].index(")")
                        subeq = compound[rpidx+1:lpidx]
                        substr = "("+''.join(subeq)+")"
                        compound = compound[:rpidx]+[substr]+compound[lpidx+1:]
                    else:
                        subeq = compound[0:3]
                        substr = "("+''.join(subeq)+")"
                        compound = [substr]+compound[3:]
                    if True:
                        p, op, e = subeq
                        p = objs[p]
                        e = objs[e]
                        op = op.strip()
                        trips.append((op, p, e))
                        pute = (0, makesets.combine(p[1], e[1], op))
                        objs[substr] = pute
                    if pute == -1:
                        exit()
            t = training(trips, problem, story, target)
            for op in t:
                bigtexamples[op][0].extend(t[op][0])
                bigtexamples[op][1].extend(t[op][1])
    with open('data/' + sys.argv[1][-1] + ".local.training", 'wb') as f:
        pickle.dump(bigtexamples, f)
コード例 #30
0
def infer(q, a, VERBOSE):
    wps = open(q).readlines()
    answs = open(a).readlines()
    problematic = open('somethingWrongProblems', 'a')

    ar = [0, 0]
    sr = [0, 0]
    mr = [0, 0]
    dr = [0, 0]

    replacements = {
        ' two ': ' 2 ',
        " three ": ' 3 ',
        ' four ': ' 4 ',
        ' five ': ' 5 ',
        ' six ': ' 6 ',
        ' seven ': ' 7 ',
        ' eight ': ' 8 ',
        ' nine ': ' 9 ',
        ' ten ': ' 10 ',
        ' eleven ': ' 11 ',
        ' twice ': ' 2 '
    }
    right = 0
    guesses = 0
    ad = []
    wrong = []
    multiops = 0
    multiopsright = 0

    replacements = {
        ' two ': ' 2 ',
        " three ": ' 3 ',
        ' four ': ' 4 ',
        ' five ': ' 5 ',
        ' six ': ' 6 ',
        ' seven ': ' 7 ',
        ' eight ': ' 8 ',
        ' nine ': ' 9 ',
        ' ten ': ' 10 ',
        ' eleven ': ' 11 ',
        ' twice ': ' 2 '
    }

    for k in range(len(wps)):
        if VERBOSE:
            for i in range(len(wps)):
                print(i, wps[i])
            k = int(input())
        print(k)
        problem = wps[k].lower()
        #First preprocessing, tokenize slightly
        problem = problem.strip().split(" ")
        for i, x in enumerate(problem):
            if len(x) == 0: continue
            if x[-1] in [',', '.', '?']:
                problem[i] = x[:-1] + " " + x[-1]
        problem = ' '.join(problem)
        problem = " " + problem + " "
        print(problem)

        for r in replacements:
            problem = problem.replace(r, replacements[r])

        story = nlp.parse(problem)
        sets = makesets.makesets(story['sentences'])
        #REMOVE DUPS THIS IS BAD:
        i = 0
        while i < len(sets):
            x = sets[i]
            dups = [y for y in sets if y[1].num == x[1].num]
            if len(dups) > 1:
                for x in dups[1:]:
                    sets.remove(x)
            i += 1

        print("Sets detected: ")
        for x in sets:
            x[1].details()
        numlist = [(cleannum(v.num), v) for k, v in sets]
        numlist = [x for x in numlist if x[0] != '']
        if VERBOSE:
            for z, v in numlist:
                v.details()
            input()

        allnumbs = {str(k): v for k, v in numlist}

        objs = {k: (0, v) for k, v in numlist}

        constraints = []
        for i in range(len(numlist)):
            if numlist[i][0][-1] == "*":
                if i == 0: continue
                constraints.append(numlist[i - 1][0] + " * " +
                                   numlist[i][0][:-1])
                numlist[i] = (''.join([
                    x for x in numlist[i][0] if x not in ['*', '/']
                ]), numlist[i][1])
                numlist[i][1].num = numlist[i][0]
            elif numlist[i][0][0] == "*":
                if i == 0: continue
                numlist[i] = (''.join([
                    x for x in numlist[i][0] if x not in ['*', '/']
                ]), numlist[i][1])
                tmp = numlist[i - 1]
                numlist[i - 1] = numlist[i]
                numlist[i] = tmp
                constraints.append(numlist[i - 1][0] + " * " +
                                   numlist[i][0][1:])
            elif numlist[i][0][-1] == "/":
                if i == 0: continue
                constraints.append(" / " + numlist[i][0][:-1])
                numlist[i] = (''.join([
                    x for x in numlist[i][0] if x not in ['*', '/']
                ]), numlist[i][1])
        objs = {k: (0, v) for k, v in numlist}
        if len(objs) < 2:
            wrong.append(k)
            continue
        if 'x' not in objs:
            wrong.append(k)
            continue

        integerproblem = all(
            [float(x[0]).is_integer() for x in numlist if x[0] != 'x'])
        if VERBOSE:
            print(objs, numlist, [v.num for k, v in sets])
        #print(allnumbs)

        state = []
        #print(numlist)

        #for e in allnumbs.items():
        #print(numlist)
        numidxlist = [x[0] for x in numlist]
        ST = StringTemplate(numidxlist, inf=True)
        scores = []
        for j, eq in enumerate(ST.equations):
            #print(j,eq.toString())
            good = False
            if len(constraints) == 0:
                good = True
            else:
                for constraint in constraints:
                    if constraint in eq.toString():
                        good = True
            if not good:
                scores.append(-0.2)
                continue

            thisscore = []
            #print(eq.toString())
            #determine score for this eq
            l, r = [x.strip().split(' ') for x in eq.toString().split('=')]
            #print(l,r)

            if len(r) > 1 and len(l) > 1:
                scores.append(-0.2)
                continue
            if len(r) > 1:

                compound = r
                target = l[0]
            else:
                #print(constraints)
                compound = l
                target = r[0]
            target = (target, objs[target])

            #find innermost parens?
            while len(compound) > 1:
                if "(" in compound:
                    rpidx = (len(compound) - 1) - compound[::-1].index('(')
                    lpidx = rpidx + compound[rpidx:].index(")")
                    subeq = compound[rpidx + 1:lpidx]
                    substr = "(" + ''.join(subeq) + ")"
                    compound = compound[:rpidx] + [substr
                                                   ] + compound[lpidx + 1:]
                else:
                    subeq = compound[0:3]
                    substr = "(" + ''.join(subeq) + ")"
                    compound = [substr] + compound[3:]
                if substr in objs:
                    pute = objs[substr]
                    #print(pute[0],pute[1].num)
                else:
                    p, op, e = subeq
                    #print(p,op,e)
                    p = objs[p][1]
                    e = objs[e][1]
                    op = op.strip()
                    pute = compute(p, op, e, target, problem)
                    #print("OPERATION SELECTED: ",op)
                    #p.details()
                    #e.details()
                    #print(substr,pute[1].num)
                    objs[substr] = pute
                if pute == -1:
                    exit()
                score, c = pute
                thisscore.append(score)
            #if target[1][1].entity != c.entity:
            #    thisscore.append(-0.2)
            #print("WAT",thisscore,c.ent,c.num)
            if len(thisscore) == 0:
                scores.append(0)
            else:
                scores.append(sum(thisscore) / float(len(thisscore)))

            #print(compound)
        m = np.argmax(scores)
        #print(scores[m],ST.equations[m].toString())
        srt = sorted([(x, i) for i, x in enumerate(scores)], reverse=True)
        print('\n Top scoring 3 equations: ')
        for x, i in srt[:3]:
            print(x, ST.equations[i].toString())
        '''
        try:
            if target.ent=='dozen':
                guess = solve('('+numlist[0].num+'/12)'+"-"+target.num,'x')[0]
                print(numlist[0].num+"/12="+target.num)
            else:
                guess = solve(numlist[0].num+"-"+target.num,'x')[0]
                print(numlist[0].num+"="+target.num)
        '''
        eqidxs = [
            y[0] for y in sorted(
                enumerate(scores), key=lambda x: x[1], reverse=True)
        ]
        seen = []
        tright = 0
        for i in eqidxs:
            if len(seen) >= 1: break
            eq = ST.equations[i].toString()
            #eq = eq.replace("=",'-')
            splitEquation = eq.split('=')
            eq = splitEquation[0] + '- (' + splitEquation[1] + ')'
            #print(scores[i], eq)
            try:
                guess = solve(eq, 'x')[0]
            except:
                guess = -1

            # This is the non-negative constraint
            # wrapped in a "check for complex number" try statement :/
            try:
                if guess < 0:
                    continue
            except:
                continue

            #this is a constraint agianst fractional answers when the problem is integers
            if not guess.is_integer:
                if integerproblem:
                    continue

            if guess not in seen:
                seen.append(guess)
            else:
                continue
            answ = float(answs[k])
            ops = [
                x for x in ST.equations[i].toString()
                if x in ['+', '-', '*', '/']
            ]
            if guess == answ:
                print("\nCORRECT")
                tright = 1
                ar[0] += ops.count('+')
                sr[0] += ops.count('-')
                mr[0] += ops.count('*')
                dr[0] += ops.count('/')
            else:
                print("\nINCORRECT")
            ar[1] += ops.count('+')
            sr[1] += ops.count('-')
            mr[1] += ops.count('*')
            dr[1] += ops.count('/')
            print("Guessed Equation : ", ST.equations[i].toString())

            print("Guess : ", guess, "\nTrue Answer :", answ, '\n\n')
        guesses += len(seen)
        if tright == 1:
            if multi:
                multiopsright += 1
            right += 1
        else:
            wrong.append(k)

        #break
        if VERBOSE: input()
        continue
    print(right, guesses)
    print(multiops, multiopsright)
    print(ar, sr, mr, dr)
コード例 #31
0
def make_eq(q,a,equations):
    tdata = []
    wps = q #open(q).readlines()
    answs = a #open(a).readlines()

    for k in range(len(wps)):
        answers = get_k_eqs(equations[k],g=True)
        good = list(set([x for x in answers if x[0]==1]))
        bad = list(set([x for x in answers if x[0]==0]))[:len(good)]
        '''
        if len(bad)>len(good):
            bad = sample(bad,len(good))
        '''
        answers = good+bad
        if answers == []: continue
        answers = list(set(answers))


        #First preprocessing, tokenize slightly
        problem = wps[k]#.lower()
        problem = problem.strip().split(" ")
        for i,x in enumerate(problem):
            if len(x)==0:continue
            if x[-1] in [',','.','?']:
                problem[i] = x[:-1]+" "+x[-1]
        problem = ' '.join(problem)
        problem = " " + problem + " "
        print(problem)

        #make story
        story = nlp.parse(problem)
        sets = makesets.makesets(story['sentences'])
        i = 0

        xidx = [i for i,x in enumerate(sets) if x[1].num=='x']
        if not xidx:
            print("NO X WHY");continue

        #TODO look for 2 xes
        xidx = xidx[0]


        numlist = [(cleannum(v.num),v) for k,v in sets]
        numlist = [x for x in numlist if x[0]!='']
        allnumbs = {str(k):v for k,v in numlist}
        objs = {k:(0,v) for k,v in numlist}
        print(numlist)
        consts = [x for x in answers[0][1].split(" ") if x not in ['(',')','+','-','/','*','=',]]
        print(consts)
        present = [x for x in consts if x in objs]
        if present!=consts: print(present,consts);print("missing thing");continue

        scores = []
        print(answers)
        g = 0
        b = 0

        for j,eq,cons in answers:
            eqspl = eq.split(" = ")
            consts = [x for x in eq.split(" ") if x not in ['(',')','+','-','/','*','=',]]
            order = int(consts==[x[0] for x in numlist])
            if order == 0: 
                if eqspl[0].strip() == 'x' or eqspl[1].strip()=='x':
                    eq2 = eqspl[1] + " = " + eqspl[0]
                    consts = [x for x in eq2.split(" ") if x not in ['(',')','+','-','/','*','=',]]
                    order = int(consts==[x[0] for x in numlist])
                    eq = eq2
                if order == 0: 
                    continue
            if j == 1:
                if g == 1: continue
                else: g = 1
            if j == 0:
                if b == 1 : continue
                else: b = 1
            '''
            consts = [x for x in eq.split(" ") if x not in ['(',')','+','-','/','*','=',]]
            order = int(consts==[x[0] for x in numlist])
            #if order == 0:continue
            '''
            trips = []
            print(j,eq)
            l,r = [x.strip().split(' ') for x in eq.split('=')]
            consts = " ".join([x for x in answers[0][1].split(" ") if x not in ['(',')','+','-','/','*',]])
            consts = consts.split(" = ")
            
            target = 'x'
            target = (target,objs[target])

            #find innermost parens?
            sides = []
            thisscore = []
            for i,compound in enumerate([l,r]):
                while len(compound)>1:
                    if "(" in compound:
                        rpidx = (len(compound) - 1) - compound[::-1].index('(')
                        lpidx = rpidx+compound[rpidx:].index(")")
                        subeq = compound[rpidx+1:lpidx]
                        substr = "("+''.join(subeq)+")"
                        compound = compound[:rpidx]+[substr]+compound[lpidx+1:]
                    else:
                        subeq = compound[0:3]
                        substr = "("+''.join(subeq)+")"
                        compound = [substr]+compound[3:]
                    p,op,e = subeq
                    p = objs[p]
                    e = objs[e]
                    op = op.strip()
                    pute = compute(p,op,e,target,problem,story,order)
                    objs[substr]=pute
                    if pute == -1:
                        exit()
                    score,c,vals = pute
                    thisscore.append(score)
                sides.append(objs[compound[0]])
            p = sides[0]; e = sides[1]
            #thisscore.append(compute(p,'=',e,target,problem,story,order,sp)[0])
            score = 1
            for s in thisscore: score *= s
            #scores.append((score,j,eq))
            tdata.append(training(sides[0],sides[1],problem,story,target,j,order,score,cons,eq))

    f = open("data/"+sys.argv[1][-1]+".global.data",'w') 
    for v in tdata:
        f.write(str(v[0])+" ")
        for i,j in enumerate(v[1:]):
            f.write(str(i+1)+":"+str(j)+" ")
        f.write("\n")