class CoraCitations(Dataset): def __init__(self, filename): self.Y = Alphabet() data = list( fromSGML(filename, linegrouper="<NEW.*?>", bioencoding=False)) np.random.shuffle(data) super(CoraCitations, self).__init__(train=data[len(data) // 5:], dev=data[:len(data) // 5], test=[]) self.train = self.make_instances('train', Instance) self.dev = self.make_instances('dev', Instance) def evaluate(self, predict, data, name, verbosity=1): if not data: return if verbosity: print() print('Phrase-based F1:', name) f1 = F1() for i, x in enumerate(iterview(data, msg='Eval %s' % name)): pred = extract_contiguous(predict(x)) gold = extract_contiguous(self.Y.lookup_many(x.tags)) # (i,begin,end) uniquely identifies the span for (label, begins, ends) in gold: f1.add_relevant(label, (i, begins, ends)) for (label, begins, ends) in pred: f1.add_retrieved(label, (i, begins, ends)) if verbosity: print() return f1.scores(verbose=verbosity >= 1)
1: 'hypernym', 2: 'hyponym', } A = Alphabet() A.map([x.strip().split()[1] for i, x in enumerate(file('res/bowman_wordnet_longer_shuffled_synset_relations.map')) if i > 2]) tst = BWD_dataset('test').data trn = BWD_dataset('train').data trn_x = trn[0] trn_y = trn[1] seen = set(trn_x.flatten()) | set(trn_y.flatten()) X,Y,_ = tst X = list(A.lookup_many(X.flatten())) Y = list(A.lookup_many(Y.flatten())) #D = np.array([X,Y,L.flatten()]).T model_file = 'res/experiments/BWD-projection-Softmax_best.pkl' #model_file = '/home/timv/Downloads/BWD-projection-identity_sub_glue-Softmax.pkl' model_func = make_model_func(cPickle.load(open(model_file, 'rb'))) (x_left, x_right, y_true) = tst y_true = y_true.flatten() y_dist = model_func((x_left, x_right)) y_hat = y_dist.argmax(axis=1) print 'before unseen filter acc: %g' % (y_hat == y_true).mean()
A = Alphabet() A.map([ x.strip().split()[1] for i, x in enumerate( file('res/bowman_wordnet_longer_shuffled_synset_relations.map')) if i > 2 ]) tst = BWD_dataset('test').data trn = BWD_dataset('train').data trn_x = trn[0] trn_y = trn[1] seen = set(trn_x.flatten()) | set(trn_y.flatten()) X, Y, _ = tst X = list(A.lookup_many(X.flatten())) Y = list(A.lookup_many(Y.flatten())) #D = np.array([X,Y,L.flatten()]).T model_file = 'res/experiments/BWD-projection-Softmax_best.pkl' #model_file = '/home/timv/Downloads/BWD-projection-identity_sub_glue-Softmax.pkl' model_func = make_model_func(cPickle.load(open(model_file, 'rb'))) (x_left, x_right, y_true) = tst y_true = y_true.flatten() y_dist = model_func((x_left, x_right)) y_hat = y_dist.argmax(axis=1) print 'before unseen filter acc: %g' % (y_hat == y_true).mean() unseen = 0