def objective_mean(dset): sample = util.sample_if_large(dset, self.dset_samples) vals = [] for ex in util.verboserate(sample): vals.append(experiment.model.objective(ex.sentences, ex.mask, ex.question, ex.answer[0], ex.hints)) return np.mean(vals)
def accuracy_mean(dset): sample = util.sample_if_large(dset, self.dset_samples) vals = [] for ex in util.verboserate(sample): correct = ex.answer == experiment.model.predict(ex.sentences, ex.mask, ex.question) vals.append(correct) return np.mean(vals)
def summarize_neighborhood(graph, seed=None, max_depth=2, nbr_samples=20, save_path=None): if seed is None: seed = random.choice(graph.neighbors.keys()) print 'seed:', seed triples = set() explored = set() queue = deque() queue.append((seed, 0)) while len(queue) != 0: entity, depth = queue.popleft() if depth >= max_depth: continue # loop through each available relation for r in graph.neighbors[entity]: # sample neighbors nbrs = graph.neighbors[entity][r] sampled_nbrs = util.sample_if_large(nbrs, nbr_samples, replace=False) num_missed = len(nbrs) - len(sampled_nbrs) edge_crossed = lambda target: (entity, r, target) if not inverted( r) else (target, invert(r), entity) # document edges crossed, and add nbrs to queue for nbr in sampled_nbrs: triples.add(edge_crossed(nbr)) if nbr not in explored: queue.append((nbr, depth + 1)) # add "summary entity" for all entities we missed if num_missed > 0: triples.add( edge_crossed('{}_{}_{}'.format(entity, r, num_missed))) if save_path is not None: with open(save_path, 'w') as f: for tr in triples: f.write('\t'.join(tr) + '\n') return list(triples)
def observe(self, maximizer, thresholds=None): if (maximizer.steps + 1) % self.report_wait != 0: return None samples = util.sample_if_large(self.examples, self.eval_samples, replace=True) # score samples = copy.deepcopy(samples) for ex in samples: try: ex.score = maximizer.objective.predict(maximizer.params, ex).ravel()[0] except KeyError: print 'out of vocab' ex.score = float('inf') if thresholds is None: thresholds = compute_best_thresholds(samples) acc = accuracy(thresholds, samples) return {('accuracy', 'test'): acc}
def summarize_neighborhood(graph, seed=None, max_depth=2, nbr_samples=20, save_path=None): if seed is None: seed = random.choice(graph.neighbors.keys()) print 'seed:', seed triples = set() explored = set() queue = deque() queue.append((seed, 0)) while len(queue) != 0: entity, depth = queue.popleft() if depth >= max_depth: continue # loop through each available relation for r in graph.neighbors[entity]: # sample neighbors nbrs = graph.neighbors[entity][r] sampled_nbrs = util.sample_if_large(nbrs, nbr_samples, replace=False) num_missed = len(nbrs) - len(sampled_nbrs) edge_crossed = lambda target: (entity, r, target) if not inverted(r) else (target, invert(r), entity) # document edges crossed, and add nbrs to queue for nbr in sampled_nbrs: triples.add(edge_crossed(nbr)) if nbr not in explored: queue.append((nbr, depth + 1)) # add "summary entity" for all entities we missed if num_missed > 0: triples.add(edge_crossed('{}_{}_{}'.format(entity, r, num_missed))) if save_path is not None: with open(save_path, 'w') as f: for tr in triples: f.write('\t'.join(tr) + '\n') return list(triples)
def mean_rank(self, maximizer, dset): sample = util.sample_if_large(dset, self.eval_samples) ranks = [self.rank(maximizer, ex) for ex in util.verboserate(sample)] return np.nanmean(ranks)
def final_evaluation(dataset_path, model_name, params_path, eval_type, eval_samples=float('inf'), max_negative_samples=float('inf'), type_matching_negs=True): dset = parse_dataset(dataset_path) model = CompositionalModel(None, path_model=model_name, objective='margin') params = load_params(params_path, model_name) neg_gen = NegativeGenerator(dset.full_graph, max_negative_samples, type_matching_negs=type_matching_negs) queries = util.sample_if_large(dset.test, eval_samples, replace=False) # Define different evaluation functions # ----- ----- ----- ----- ----- scores = lambda query: model.predict(params, query).ravel() def performance(query): s, r, t = query.s, query.r, query.t negatives = neg_gen(query, 't') pos_query = PathQuery(s, r, t) neg_query = PathQuery(s, r, negatives) # don't score queries with no negatives if len(negatives) == 0: query.quantile = np.nan else: query.quantile = util.average_quantile(scores(pos_query), scores(neg_query)) query.num_candidates = len(negatives) + 1 attributes = query.s, ','.join(query.r), query.t, str( query.quantile), str(query.num_candidates) return '\t'.join(attributes) def report(queries): # filter out NaNs queries = [q for q in queries if not np.isnan(q.quantile)] util.metadata('mean_quantile', np.mean([q.quantile for q in queries])) util.metadata( 'h10', np.mean([ 1.0 if util.rank_from_quantile(q.quantile, q.num_candidates) <= 10 else 0.0 for q in queries ])) def average_quantile(s, p): negatives, positives = neg_gen(PathQuery(s, p, ''), 't', return_positives=True) pos_query = PathQuery(s, p, positives) neg_query = PathQuery(s, p, negatives) return util.average_quantile(scores(pos_query), scores(neg_query)) def intermediate_aqs(query): s, path = query.s, query.r aqs = [] for length in 1 + np.arange(len(path)): p = path[:length] aq = average_quantile(s, p) aqs.append(aq) attributes = query.s, ','.join(query.r), query.t, ','.join( str(aq) for aq in aqs) return '\t'.join(attributes) # ----- ----- ----- ----- ----- if eval_type == 'mean_quantile': eval_fxn = performance eval_report = report elif eval_type == 'intermediate_aqs': eval_fxn = intermediate_aqs eval_report = lambda qs: None else: raise ValueError(eval_type) with open('results.tsv', 'w') as f: def progress(steps, elapsed): print '{} of {} processed ({} s)'.format(steps, len(queries), elapsed) util.metadata('steps', steps) util.metadata('gb_used', util.gb_used()) sys.stdout.flush() f.flush() for query in util.verboserate(queries, report=progress): s = eval_fxn(query) f.write(s) f.write('\n') eval_report(queries) with open('queries.cpkl', 'w') as f: pickle.dump(queries, f)
def avg_steps(self, experiment, dset): sample = util.sample_if_large(dset, self.eval_samples) steps = [self.get_steps(experiment, goal) for goal in sample] return np.mean(steps)
def final_evaluation(dataset_path, model_name, params_path, eval_type, eval_samples=float('inf'), max_negative_samples=float('inf'), type_matching_negs=True): dset = parse_dataset(dataset_path) model = CompositionalModel(None, path_model=model_name, objective='margin') params = load_params(params_path, model_name) neg_gen = NegativeGenerator(dset.full_graph, max_negative_samples, type_matching_negs=type_matching_negs) queries = util.sample_if_large(dset.test, eval_samples, replace=False) # Define different evaluation functions # ----- ----- ----- ----- ----- scores = lambda query: model.predict(params, query).ravel() def performance(query): s, r, t = query.s, query.r, query.t negatives = neg_gen(query, 't') pos_query = PathQuery(s, r, t) neg_query = PathQuery(s, r, negatives) # don't score queries with no negatives if len(negatives) == 0: query.quantile = np.nan else: query.quantile = util.average_quantile(scores(pos_query), scores(neg_query)) query.num_candidates = len(negatives) + 1 attributes = query.s, ','.join(query.r), query.t, str(query.quantile), str(query.num_candidates) return '\t'.join(attributes) def report(queries): # filter out NaNs queries = [q for q in queries if not np.isnan(q.quantile)] util.metadata('mean_quantile', np.mean([q.quantile for q in queries])) util.metadata('h10', np.mean([1.0 if util.rank_from_quantile(q.quantile, q.num_candidates) <= 10 else 0.0 for q in queries])) def average_quantile(s, p): negatives, positives = neg_gen(PathQuery(s, p, ''), 't', return_positives=True) pos_query = PathQuery(s, p, positives) neg_query = PathQuery(s, p, negatives) return util.average_quantile(scores(pos_query), scores(neg_query)) def intermediate_aqs(query): s, path = query.s, query.r aqs = [] for length in 1 + np.arange(len(path)): p = path[:length] aq = average_quantile(s, p) aqs.append(aq) attributes = query.s, ','.join(query.r), query.t, ','.join(str(aq) for aq in aqs) return '\t'.join(attributes) # ----- ----- ----- ----- ----- if eval_type == 'mean_quantile': eval_fxn = performance eval_report = report elif eval_type == 'intermediate_aqs': eval_fxn = intermediate_aqs eval_report = lambda qs: None else: raise ValueError(eval_type) with open('results.tsv', 'w') as f: def progress(steps, elapsed): print '{} of {} processed ({} s)'.format(steps, len(queries), elapsed) util.metadata('steps', steps) util.metadata('gb_used', util.gb_used()) sys.stdout.flush() f.flush() for query in util.verboserate(queries, report=progress): s = eval_fxn(query) f.write(s) f.write('\n') eval_report(queries) with open('queries.cpkl', 'w') as f: pickle.dump(queries, f)
def objective_mean(dset): sample = util.sample_if_large(dset, self.dset_samples) vals = [maximizer.objective.value(maximizer.params, ex) for ex in util.verboserate(sample)] return np.mean(vals)