Beispiel #1
0
 def objective_mean(dset):
     sample = util.sample_if_large(dset, self.dset_samples)
     vals = []
     for ex in util.verboserate(sample):
         vals.append(experiment.model.objective(ex.sentences,
                     ex.mask, ex.question, ex.answer[0], ex.hints))
     return np.mean(vals)
Beispiel #2
0
            def accuracy_mean(dset):
                sample = util.sample_if_large(dset, self.dset_samples)

                vals = []
                for ex in util.verboserate(sample):
                    correct = ex.answer == experiment.model.predict(ex.sentences, ex.mask, ex.question)
                    vals.append(correct)
                return np.mean(vals)
Beispiel #3
0
def summarize_neighborhood(graph,
                           seed=None,
                           max_depth=2,
                           nbr_samples=20,
                           save_path=None):
    if seed is None:
        seed = random.choice(graph.neighbors.keys())
        print 'seed:', seed

    triples = set()
    explored = set()
    queue = deque()
    queue.append((seed, 0))
    while len(queue) != 0:
        entity, depth = queue.popleft()

        if depth >= max_depth:
            continue

        # loop through each available relation
        for r in graph.neighbors[entity]:
            # sample neighbors
            nbrs = graph.neighbors[entity][r]
            sampled_nbrs = util.sample_if_large(nbrs,
                                                nbr_samples,
                                                replace=False)
            num_missed = len(nbrs) - len(sampled_nbrs)

            edge_crossed = lambda target: (entity, r, target) if not inverted(
                r) else (target, invert(r), entity)

            # document edges crossed, and add nbrs to queue
            for nbr in sampled_nbrs:
                triples.add(edge_crossed(nbr))
                if nbr not in explored:
                    queue.append((nbr, depth + 1))

            # add "summary entity" for all entities we missed
            if num_missed > 0:
                triples.add(
                    edge_crossed('{}_{}_{}'.format(entity, r, num_missed)))

    if save_path is not None:
        with open(save_path, 'w') as f:
            for tr in triples:
                f.write('\t'.join(tr) + '\n')

    return list(triples)
    def observe(self, maximizer, thresholds=None):
        if (maximizer.steps + 1) % self.report_wait != 0:
            return None

        samples = util.sample_if_large(self.examples, self.eval_samples, replace=True)

        # score
        samples = copy.deepcopy(samples)
        for ex in samples:
            try:
                ex.score = maximizer.objective.predict(maximizer.params, ex).ravel()[0]
            except KeyError:
                print 'out of vocab'
                ex.score = float('inf')

        if thresholds is None:
            thresholds = compute_best_thresholds(samples)
        acc = accuracy(thresholds, samples)

        return {('accuracy', 'test'): acc}
def summarize_neighborhood(graph, seed=None, max_depth=2, nbr_samples=20, save_path=None):
    if seed is None:
        seed = random.choice(graph.neighbors.keys())
        print 'seed:', seed

    triples = set()
    explored = set()
    queue = deque()
    queue.append((seed, 0))
    while len(queue) != 0:
        entity, depth = queue.popleft()

        if depth >= max_depth:
            continue

        # loop through each available relation
        for r in graph.neighbors[entity]:
            # sample neighbors
            nbrs = graph.neighbors[entity][r]
            sampled_nbrs = util.sample_if_large(nbrs, nbr_samples, replace=False)
            num_missed = len(nbrs) - len(sampled_nbrs)

            edge_crossed = lambda target: (entity, r, target) if not inverted(r) else (target, invert(r), entity)

            # document edges crossed, and add nbrs to queue
            for nbr in sampled_nbrs:
                triples.add(edge_crossed(nbr))
                if nbr not in explored:
                    queue.append((nbr, depth + 1))

            # add "summary entity" for all entities we missed
            if num_missed > 0:
                triples.add(edge_crossed('{}_{}_{}'.format(entity, r, num_missed)))

    if save_path is not None:
        with open(save_path, 'w') as f:
            for tr in triples:
                f.write('\t'.join(tr) + '\n')

    return list(triples)
Beispiel #6
0
    def observe(self, maximizer, thresholds=None):
        if (maximizer.steps + 1) % self.report_wait != 0:
            return None

        samples = util.sample_if_large(self.examples,
                                       self.eval_samples,
                                       replace=True)

        # score
        samples = copy.deepcopy(samples)
        for ex in samples:
            try:
                ex.score = maximizer.objective.predict(maximizer.params,
                                                       ex).ravel()[0]
            except KeyError:
                print 'out of vocab'
                ex.score = float('inf')

        if thresholds is None:
            thresholds = compute_best_thresholds(samples)
        acc = accuracy(thresholds, samples)

        return {('accuracy', 'test'): acc}
Beispiel #7
0
 def mean_rank(self, maximizer, dset):
     sample = util.sample_if_large(dset, self.eval_samples)
     ranks = [self.rank(maximizer, ex) for ex in util.verboserate(sample)]
     return np.nanmean(ranks)
Beispiel #8
0
def final_evaluation(dataset_path,
                     model_name,
                     params_path,
                     eval_type,
                     eval_samples=float('inf'),
                     max_negative_samples=float('inf'),
                     type_matching_negs=True):
    dset = parse_dataset(dataset_path)
    model = CompositionalModel(None, path_model=model_name, objective='margin')
    params = load_params(params_path, model_name)
    neg_gen = NegativeGenerator(dset.full_graph,
                                max_negative_samples,
                                type_matching_negs=type_matching_negs)

    queries = util.sample_if_large(dset.test, eval_samples, replace=False)

    # Define different evaluation functions
    # ----- ----- ----- ----- -----
    scores = lambda query: model.predict(params, query).ravel()

    def performance(query):
        s, r, t = query.s, query.r, query.t
        negatives = neg_gen(query, 't')
        pos_query = PathQuery(s, r, t)
        neg_query = PathQuery(s, r, negatives)

        # don't score queries with no negatives
        if len(negatives) == 0:
            query.quantile = np.nan
        else:
            query.quantile = util.average_quantile(scores(pos_query),
                                                   scores(neg_query))

        query.num_candidates = len(negatives) + 1

        attributes = query.s, ','.join(query.r), query.t, str(
            query.quantile), str(query.num_candidates)
        return '\t'.join(attributes)

    def report(queries):
        # filter out NaNs
        queries = [q for q in queries if not np.isnan(q.quantile)]
        util.metadata('mean_quantile', np.mean([q.quantile for q in queries]))
        util.metadata(
            'h10',
            np.mean([
                1.0
                if util.rank_from_quantile(q.quantile, q.num_candidates) <= 10
                else 0.0 for q in queries
            ]))

    def average_quantile(s, p):
        negatives, positives = neg_gen(PathQuery(s, p, ''),
                                       't',
                                       return_positives=True)
        pos_query = PathQuery(s, p, positives)
        neg_query = PathQuery(s, p, negatives)
        return util.average_quantile(scores(pos_query), scores(neg_query))

    def intermediate_aqs(query):
        s, path = query.s, query.r
        aqs = []
        for length in 1 + np.arange(len(path)):
            p = path[:length]
            aq = average_quantile(s, p)
            aqs.append(aq)

        attributes = query.s, ','.join(query.r), query.t, ','.join(
            str(aq) for aq in aqs)
        return '\t'.join(attributes)

    # ----- ----- ----- ----- -----

    if eval_type == 'mean_quantile':
        eval_fxn = performance
        eval_report = report
    elif eval_type == 'intermediate_aqs':
        eval_fxn = intermediate_aqs
        eval_report = lambda qs: None
    else:
        raise ValueError(eval_type)

    with open('results.tsv', 'w') as f:

        def progress(steps, elapsed):
            print '{} of {} processed ({} s)'.format(steps, len(queries),
                                                     elapsed)
            util.metadata('steps', steps)
            util.metadata('gb_used', util.gb_used())
            sys.stdout.flush()
            f.flush()

        for query in util.verboserate(queries, report=progress):
            s = eval_fxn(query)
            f.write(s)
            f.write('\n')

    eval_report(queries)

    with open('queries.cpkl', 'w') as f:
        pickle.dump(queries, f)
Beispiel #9
0
 def avg_steps(self, experiment, dset):
     sample = util.sample_if_large(dset, self.eval_samples)
     steps = [self.get_steps(experiment, goal) for goal in sample]
     return np.mean(steps)
 def mean_rank(self, maximizer, dset):
     sample = util.sample_if_large(dset, self.eval_samples)
     ranks = [self.rank(maximizer, ex) for ex in util.verboserate(sample)]
     return np.nanmean(ranks)
def final_evaluation(dataset_path, model_name, params_path, eval_type, eval_samples=float('inf'),
                     max_negative_samples=float('inf'), type_matching_negs=True):
    dset = parse_dataset(dataset_path)
    model = CompositionalModel(None, path_model=model_name, objective='margin')
    params = load_params(params_path, model_name)
    neg_gen = NegativeGenerator(dset.full_graph, max_negative_samples, type_matching_negs=type_matching_negs)

    queries = util.sample_if_large(dset.test, eval_samples, replace=False)

    # Define different evaluation functions
    # ----- ----- ----- ----- -----
    scores = lambda query: model.predict(params, query).ravel()

    def performance(query):
        s, r, t = query.s, query.r, query.t
        negatives = neg_gen(query, 't')
        pos_query = PathQuery(s, r, t)
        neg_query = PathQuery(s, r, negatives)

        # don't score queries with no negatives
        if len(negatives) == 0:
            query.quantile = np.nan
        else:
            query.quantile = util.average_quantile(scores(pos_query), scores(neg_query))

        query.num_candidates = len(negatives) + 1

        attributes = query.s, ','.join(query.r), query.t, str(query.quantile), str(query.num_candidates)
        return '\t'.join(attributes)

    def report(queries):
        # filter out NaNs
        queries = [q for q in queries if not np.isnan(q.quantile)]
        util.metadata('mean_quantile', np.mean([q.quantile for q in queries]))
        util.metadata('h10', np.mean([1.0 if util.rank_from_quantile(q.quantile, q.num_candidates) <= 10 else 0.0 for q in queries]))

    def average_quantile(s, p):
        negatives, positives = neg_gen(PathQuery(s, p, ''), 't', return_positives=True)
        pos_query = PathQuery(s, p, positives)
        neg_query = PathQuery(s, p, negatives)
        return util.average_quantile(scores(pos_query), scores(neg_query))

    def intermediate_aqs(query):
        s, path = query.s, query.r
        aqs = []
        for length in 1 + np.arange(len(path)):
            p = path[:length]
            aq = average_quantile(s, p)
            aqs.append(aq)

        attributes = query.s, ','.join(query.r), query.t, ','.join(str(aq) for aq in aqs)
        return '\t'.join(attributes)

    # ----- ----- ----- ----- -----

    if eval_type == 'mean_quantile':
        eval_fxn = performance
        eval_report = report
    elif eval_type == 'intermediate_aqs':
        eval_fxn = intermediate_aqs
        eval_report = lambda qs: None
    else:
        raise ValueError(eval_type)

    with open('results.tsv', 'w') as f:

        def progress(steps, elapsed):
            print '{} of {} processed ({} s)'.format(steps, len(queries), elapsed)
            util.metadata('steps', steps)
            util.metadata('gb_used', util.gb_used())
            sys.stdout.flush()
            f.flush()

        for query in util.verboserate(queries, report=progress):
            s = eval_fxn(query)
            f.write(s)
            f.write('\n')

    eval_report(queries)

    with open('queries.cpkl', 'w') as f:
        pickle.dump(queries, f)
 def objective_mean(dset):
     sample = util.sample_if_large(dset, self.dset_samples)
     vals = [maximizer.objective.value(maximizer.params, ex) for ex in util.verboserate(sample)]
     return np.mean(vals)
 def objective_mean(dset):
     sample = util.sample_if_large(dset, self.dset_samples)
     vals = [maximizer.objective.value(maximizer.params, ex) for ex in util.verboserate(sample)]
     return np.mean(vals)