Ejemplo n.º 1
0
 def average_quantile(s, p):
     negatives, positives = neg_gen(PathQuery(s, p, ''),
                                    't',
                                    return_positives=True)
     pos_query = PathQuery(s, p, positives)
     neg_query = PathQuery(s, p, negatives)
     return util.average_quantile(scores(pos_query), scores(neg_query))
Ejemplo n.º 2
0
def difference_evaluation(name):
    queries = []
    with open(join(data_directory, name + '.tsv'), 'r') as f:
        for line in util.verboserate(f):
            items = line.split('\t')
            s, r, t = items[0], tuple(items[1].split(',')), items[2]
            q = PathQuery(s, r, t)
            q.aqs = [float(s) for s in items[3].split(',')]
            queries.append(q)

    aq_deltas = defaultdict(list)
    for q in queries:
        aqs = [1.0] + q.aqs
        for i in range(1, len(aqs)):
            r = q.r[i - 1]
            aq, prev_aq = aqs[i], aqs[i - 1]

            if prev_aq == 1.0:
                delta = 1.0  # no ground to gain
            elif prev_aq == 0.0:
                delta = np.nan  # no ground to lose
            else:
                diff = aq - prev_aq
                if diff >= 0:
                    delta = diff / (1.0 - prev_aq)  # portion recovered
                else:
                    delta = diff / prev_aq  # portion lost

            if not np.isnan(delta):
                aq_deltas[r].append(delta)

    return pd.DataFrame({
        'mean(aq_diff)':
        dict((r, np.nanmean(deltas)) for r, deltas in aq_deltas.iteritems())
    })
Ejemplo n.º 3
0
    def performance(query):
        s, r, t = query.s, query.r, query.t
        negatives = neg_gen(query, 't')
        pos_query = PathQuery(s, r, t)
        neg_query = PathQuery(s, r, negatives)

        # don't score queries with no negatives
        if len(negatives) == 0:
            query.quantile = np.nan
        else:
            query.quantile = util.average_quantile(scores(pos_query),
                                                   scores(neg_query))

        query.num_candidates = len(negatives) + 1

        attributes = query.s, ','.join(query.r), query.t, str(
            query.quantile), str(query.num_candidates)
        return '\t'.join(attributes)
Ejemplo n.º 4
0
    def predict(self, maximizer, ex):
        samples = self.neg_generator(ex, 't')
        samples.insert(0, ex.t)  # insert positive at front

        scores = maximizer.objective.predict(maximizer.params,
                                             PathQuery(ex.s, ex.r,
                                                       samples)).ravel()
        assert len(scores.shape) == 1

        ranks = util.ranks(scores, ascending=False)
        return samples, scores, ranks
Ejemplo n.º 5
0
def segmented_evaluation(file_path, categorize=None):
    queries = []
    with open(file_path, 'r') as f:
        for line in util.verboserate(f):
            items = line.split('\t')
            s, r, t = items[0], tuple(items[1].split(',')), items[2]
            q = PathQuery(s, r, t)
            quantile_str = items[3]
            q.quantile = float(quantile_str)
            q.num_candidates = int(items[4])
            queries.append(q)

    def single_relation(query):
        if len(query.r) != 1:
            return False
        r = query.r[-1]
        if inverted(r):
            return False
        return r

    # group queries
    if categorize is None:
        categorize = single_relation

    groups = util.group(queries, categorize)

    print 'computing grouped stats'
    stats = defaultdict(dict)
    for key, queries in util.verboserate(groups.iteritems()):
        scores = [q.quantile for q in queries]
        score = np.nanmean(scores)

        def inv_sigmoid(y):
            return -np.log(1.0 / y - 1)

        score2 = inv_sigmoid(score)

        total = len(scores)
        nontrivial = np.count_nonzero(~np.isnan(scores))

        stats[key] = {
            'score': score,
            'score2': score2,
            'total_eval': total,
            'nontrivial_eval': nontrivial
        }

    stats.pop(False, None)
    return pd.DataFrame(stats).transpose()
Ejemplo n.º 6
0
def load_socher_test(test_set_path):
    examples = []
    with open(join(data_directory, test_set_path), 'r') as f:
        for line in util.verboserate(f):
            items = line.split()
            s, r, t, label = items[0], tuple(
                items[1].split(',')), items[2], items[3]
            ex = PathQuery(s, r, t)

            if label == '1':
                ex.label = True
            elif label == '-1':
                ex.label = False
            else:
                raise ValueError(label)
            examples.append(ex)
    return examples