Ejemplo n.º 1
0
class CoraCitations(Dataset):
    def __init__(self, filename):
        self.Y = Alphabet()
        data = list(
            fromSGML(filename, linegrouper="<NEW.*?>", bioencoding=False))
        np.random.shuffle(data)
        super(CoraCitations, self).__init__(train=data[len(data) // 5:],
                                            dev=data[:len(data) // 5],
                                            test=[])
        self.train = self.make_instances('train', Instance)
        self.dev = self.make_instances('dev', Instance)

    def evaluate(self, predict, data, name, verbosity=1):
        if not data:
            return
        if verbosity:
            print()
            print('Phrase-based F1:', name)
        f1 = F1()
        for i, x in enumerate(iterview(data, msg='Eval %s' % name)):
            pred = extract_contiguous(predict(x))
            gold = extract_contiguous(self.Y.lookup_many(x.tags))
            # (i,begin,end) uniquely identifies the span
            for (label, begins, ends) in gold:
                f1.add_relevant(label, (i, begins, ends))
            for (label, begins, ends) in pred:
                f1.add_retrieved(label, (i, begins, ends))
        if verbosity:
            print()
        return f1.scores(verbose=verbosity >= 1)
Ejemplo n.º 2
0
    1: 'hypernym',
    2: 'hyponym',
}

A = Alphabet()
A.map([x.strip().split()[1] for i, x in enumerate(file('res/bowman_wordnet_longer_shuffled_synset_relations.map')) if i > 2])

tst = BWD_dataset('test').data
trn = BWD_dataset('train').data
trn_x = trn[0]
trn_y = trn[1]
seen = set(trn_x.flatten()) | set(trn_y.flatten())

X,Y,_ = tst

X = list(A.lookup_many(X.flatten()))
Y = list(A.lookup_many(Y.flatten()))
#D = np.array([X,Y,L.flatten()]).T

model_file = 'res/experiments/BWD-projection-Softmax_best.pkl'
#model_file = '/home/timv/Downloads/BWD-projection-identity_sub_glue-Softmax.pkl'
model_func = make_model_func(cPickle.load(open(model_file, 'rb')))
(x_left, x_right, y_true) = tst

y_true = y_true.flatten()
y_dist = model_func((x_left, x_right))
y_hat = y_dist.argmax(axis=1)


print 'before unseen filter acc: %g' % (y_hat == y_true).mean()
Ejemplo n.º 3
0
A = Alphabet()
A.map([
    x.strip().split()[1] for i, x in enumerate(
        file('res/bowman_wordnet_longer_shuffled_synset_relations.map'))
    if i > 2
])

tst = BWD_dataset('test').data
trn = BWD_dataset('train').data
trn_x = trn[0]
trn_y = trn[1]
seen = set(trn_x.flatten()) | set(trn_y.flatten())

X, Y, _ = tst

X = list(A.lookup_many(X.flatten()))
Y = list(A.lookup_many(Y.flatten()))
#D = np.array([X,Y,L.flatten()]).T

model_file = 'res/experiments/BWD-projection-Softmax_best.pkl'
#model_file = '/home/timv/Downloads/BWD-projection-identity_sub_glue-Softmax.pkl'
model_func = make_model_func(cPickle.load(open(model_file, 'rb')))
(x_left, x_right, y_true) = tst

y_true = y_true.flatten()
y_dist = model_func((x_left, x_right))
y_hat = y_dist.argmax(axis=1)

print 'before unseen filter acc: %g' % (y_hat == y_true).mean()

unseen = 0