Beispiel #1
0
import parse_semeval
import cPickle
import random
class Paraphrase:
    def __init__(self,name,freq=0.0):
        self.name=name
        self.freq=freq
    def __eq__(self,other):
        return self.name==other.name

print "unpickling: priors"
priors=cPickle.load(open('/home/paul/thesis/data/pickles/trainingPriors'))
print "unpickling prob table"
probs=cPickle.load(open('/home/paul/thesis/data/pickles/trainingProbs'))
f=open("/home/paul/thesis/data/SemEval2Task9/SemEval2_task9_testing_keys/FINAL_GOLD.txt")
all_pairs=parse_semeval.parse_file(f)
total=0.0
for pair in all_pairs:
    paras=[]
    for p in pair.paraphrases:
        if p.freq>2:paras.append(p) 
    number=len(paras)
    subs=random.sample(paras,3)
    results=[]
    print pair.n1+" "+pair.n2
    for p in probs.keys():
        x=Paraphrase(p.strip())
        x.score=0.0
        results.append(x)
    for p in results:
        for s in subs:
Beispiel #2
0
        score=0.0
        basescore=0.0
        for b in base[0:m]:
            if b in gold_paras:basescore+=1.0    
        for r in results[0:m]:
            if r in gold_paras:score+=1.0
        total+=(score/float(m))
        basetotal+=(basescore/float(m))
    acc=total/len(testing)
    print "predictions:"
    print total/len(testing)
    print
    baseacc=basetotal/len(testing)
    print "baseline:"
    print basetotal/len(testing)
    
    print errcount
    print nonerrcount
    results=[acc,baseacc]
    return results



 
        
if __name__=="__main__":  
    n=5
    data_file=open("/home/paul/mayThesis/semEvalTask9/combined.txt")
    all_pairs=parse_semeval.parse_file(data_file, n)
    get_results(all_pairs[200:500],all_pairs[200:500], 5)
    
Beispiel #3
0
                probs[a][b] = (cooc[a][b]) / ((priors[b]) * (priors[a] ** 0))
                # print probs[a][b]
            else:
                probs[a][b] = 0.0
    return probs


if __name__ == "__main__":
    n = 2
    out_file = open(
        "/home/paul/mayThesis/semEvalTask9/SemEval2_task9_all_data_final/SemEval2_task9_scorer/out.txt", "w"
    )
    train_file = open("/home/paul/mayThesis/semEvalTask9/combined.txt")
    test_file = open("/home/paul/mayThesis/semEvalTask9/testing.txt")
    test_file = open("/home/paul/mayThesis/semEvalTask9/testing.txt")
    test_pairs = parse_semeval.parse_file(test_file, n)
    all_pairs = parse_semeval.parse_file(train_file, n)
    priors = make_priors(all_pairs)
    probs = make_prob_table(all_pairs, priors)
    for pair in test_pairs:
        candidates = copy.copy(pair.paraphrases)
        for para in pair.paraphrases:
            for c in candidates:
                if para == c:
                    continue
                para.score += probs[para.name][c.name]
        pair.paraphrases = sorted(pair.paraphrases, key=lambda x: x.score, reverse=True)
        i = 0
        for p in pair.paraphrases:
            i += 1
            out_file.write(str(i) + " " + pair.n1 + " " + pair.n2 + " " + p.name + " " + str(p.score) + "\n")