コード例 #1
0
ファイル: evaluate.py プロジェクト: jfarrugia-uom/hyperstar
    def __call__(self, model_name):
        predictions = {}

        print('Evaluating "%s" on "%s".' % (model_name, self.args['test']),
              flush=True)

        # load k-means model
        kmeans = pickle.load(open(os.path.join(".", 'kmeans.pickle'), 'rb'))
        print('The number of clusters is %d.' % (kmeans.n_clusters),
              flush=True)

        # partition test data according to k-means model
        clusters_test = kmeans.predict(self.Y_all_test - self.X_all_test)

        try:
            with np.load(os.path.join(".", '%s.test.npz') % model_name) as npz:
                Y_hat_clusters = {
                    int(cluster): npz[cluster]
                    for cluster in npz.files
                }

        except FileNotFoundError:
            Y_hat_clusters = {}

        if kmeans.n_clusters != len(Y_hat_clusters):
            print('Missing the output for the model "%s"!' % model_name,
                  file=sys.stderr,
                  flush=True)
            return predictions

        # get estimated hypernyms for each term in test/validation set
        Y_all_hat = extract(clusters_test, Y_hat_clusters)

        # ensure we have the same number of estimates as we have of test terms
        assert len(self.subsumptions_test) == Y_all_hat.shape[0]

        # compute unit-norm of hypernym estimates
        Y_all_hat_norm = Y_all_hat / np.linalg.norm(Y_all_hat,
                                                    axis=1)[:, np.newaxis]
        # find similar words
        print('nn_vec...')
        similar_indices = nn_vec(Y_all_hat_norm,
                                 self.w2v.syn0norm,
                                 topn=15,
                                 sort=True,
                                 return_sims=False,
                                 nthreads=self.args['threads'],
                                 verbose=False)
        print('nn_vec results covert...')
        similar_words = [[self.w2v.index2word[ind] for ind in row]
                         for row in similar_indices]
        print('done')

        for i, (hyponym, hypernym) in enumerate(self.subsumptions_test):
            predictions[hyponym] = similar_words[i]

        return predictions
コード例 #2
0
        if kmeans.n_clusters != len(Y_hat_clusters):
            print('Missing the output for the model "%s"!' % model)
            continue

        Y_all_hat = extract(clusters_test, Y_hat_clusters)

        assert len(subsumptions_test) == Y_all_hat.shape[0]

        measures = [{} for _ in range(10)]

        if not args['non_optimized']:
            # normalize Y_all_hat to make dot product equeal to cosine and monotonically decreasing function of euclidean distance
            Y_all_hat_norm = Y_all_hat / np.linalg.norm(Y_all_hat,axis=1)[:,np.newaxis]
            print('nn_vec...')
            similar_indices = nn_vec(Y_all_hat_norm, w2v.syn0norm, topn=10, sort=True, return_sims=False, nthreads=args['threads'], verbose=False)
            print('nn_vec results covert...')
            similar_words = [[w2v.index2word[ind] for ind in row] for row in similar_indices]
            print('done')
        file_ptr_ms = open(str(model)+"_test_candidates1",'w')
        file_ptr_hypo = open("test_hypo1",'w')
        file_ptr_gold = open("test_gold1",'w')
        prev_hypo = ''
        gold_list = ''
        out_ms = ''
	count = 0
        for i, (hyponym, hypernym) in enumerate(subsumptions_test):
            if args['non_optimized']:
                Y_hat  = Y_all_hat[i].reshape(X_all_test.shape[1],)
                actual = [w for w,_ in w2v.most_similar(positive=[Y_hat], topn=10)]
            else: