Exemple #1
0
def segmented_evaluation(file_path, categorize=None):
    queries = []
    with open(file_path, 'r') as f:
        for line in util.verboserate(f):
            items = line.split('\t')
            s, r, t = items[0], tuple(items[1].split(',')), items[2]
            q = PathQuery(s, r, t)
            quantile_str = items[3]
            q.quantile = float(quantile_str)
            q.num_candidates = int(items[4])
            queries.append(q)

    def single_relation(query):
        if len(query.r) != 1:
            return False
        r = query.r[-1]
        if inverted(r):
            return False
        return r

    # group queries
    if categorize is None:
        categorize = single_relation

    groups = util.group(queries, categorize)

    print 'computing grouped stats'
    stats = defaultdict(dict)
    for key, queries in util.verboserate(groups.iteritems()):
        scores = [q.quantile for q in queries]
        score = np.nanmean(scores)

        def inv_sigmoid(y):
            return -np.log(1.0 / y - 1)

        score2 = inv_sigmoid(score)

        total = len(scores)
        nontrivial = np.count_nonzero(~np.isnan(scores))

        stats[key] = {
            'score': score,
            'score2': score2,
            'total_eval': total,
            'nontrivial_eval': nontrivial
        }

    stats.pop(False, None)
    return pd.DataFrame(stats).transpose()
Exemple #2
0
    def run_experiment(self):
        print 'Stochastic Gradient Descent: Examples %d ' % len(self.train)

        self.steps = 0
        self.epochs = 0
        while True:
            # reshuffle training data
            train_copy = list(self.train)
            random.shuffle(train_copy)

            # TODO: Figure out minibatches/ accumulate gradients
            # TODO: Specific to current dataset format!
            for ex in util.verboserate(train_copy):
                self.model.backprop(ex.sentences, ex.mask, ex.question,
                                    ex.answer[0], ex.hints)

                for controller in self.controllers:
                    controller.control(self)

                self.track()
                self.steps += 1

            self.epochs += 1
            if self.halt:
                return
    def get_examples(name):
        filename = join(data_path, name)
        if not isfile(filename):
            print 'Warning: ', filename, ' not found. Skipping...'
            return None

        examples_arr = list()
        with open(filename, 'r') as f:
            num_examples = 0
            for line in util.verboserate(f):
                if num_examples >= maximum_examples:
                        break
                items = line.split()
                s, path, t = items[:3]
                rels = tuple(path.split(','))
                entities.add(s)
                entities.add(t)
                relations.update(rels)

                if len(items) >= 4:
                    label = items[3]
                else:
                    label = '1'  # if no label, assume positive

                # only add positive examples
                if label == '1':
                    examples_arr.append(PathQuery(s, rels, t))
                    num_examples += 1

        return examples_arr
Exemple #4
0
 def objective_mean(dset):
     sample = util.sample_if_large(dset, self.dset_samples)
     vals = []
     for ex in util.verboserate(sample):
         vals.append(experiment.model.objective(ex.sentences,
                     ex.mask, ex.question, ex.answer[0], ex.hints))
     return np.mean(vals)
Exemple #5
0
def difference_evaluation(name):
    queries = []
    with open(join(data_directory, name + '.tsv'), 'r') as f:
        for line in util.verboserate(f):
            items = line.split('\t')
            s, r, t = items[0], tuple(items[1].split(',')), items[2]
            q = PathQuery(s, r, t)
            q.aqs = [float(s) for s in items[3].split(',')]
            queries.append(q)

    aq_deltas = defaultdict(list)
    for q in queries:
        aqs = [1.0] + q.aqs
        for i in range(1, len(aqs)):
            r = q.r[i - 1]
            aq, prev_aq = aqs[i], aqs[i - 1]

            if prev_aq == 1.0:
                delta = 1.0  # no ground to gain
            elif prev_aq == 0.0:
                delta = np.nan  # no ground to lose
            else:
                diff = aq - prev_aq
                if diff >= 0:
                    delta = diff / (1.0 - prev_aq)  # portion recovered
                else:
                    delta = diff / prev_aq  # portion lost

            if not np.isnan(delta):
                aq_deltas[r].append(delta)

    return pd.DataFrame({
        'mean(aq_diff)':
        dict((r, np.nanmean(deltas)) for r, deltas in aq_deltas.iteritems())
    })
def difference_evaluation(name):
    queries = []
    with open(join(data_directory, name + '.tsv'), 'r') as f:
        for line in util.verboserate(f):
            items = line.split('\t')
            s, r, t = items[0], tuple(items[1].split(',')), items[2]
            q = PathQuery(s, r, t)
            q.aqs = [float(s) for s in items[3].split(',')]
            queries.append(q)

    aq_deltas = defaultdict(list)
    for q in queries:
        aqs = [1.0] + q.aqs
        for i in range(1, len(aqs)):
            r = q.r[i-1]
            aq, prev_aq = aqs[i], aqs[i-1]

            if prev_aq == 1.0:
                delta = 1.0  # no ground to gain
            elif prev_aq == 0.0:
                delta = np.nan  # no ground to lose
            else:
                diff = aq - prev_aq
                if diff >= 0:
                    delta = diff / (1.0 - prev_aq)  # portion recovered
                else:
                    delta = diff / prev_aq  # portion lost

            if not np.isnan(delta):
                aq_deltas[r].append(delta)

    return pd.DataFrame({'mean(aq_diff)': dict((r, np.nanmean(deltas)) for r, deltas in aq_deltas.iteritems())})
def group_queries_by_difficulty(train_graph, full_graph, queries, existence=True, epsilon=5e-1):
    print "Filtering queries contained in train graph"
    easy_queries = []
    hard_queries = []
    for query in util.verboserate(queries):
        if existence:

            if isinstance(query, PathQuery) or isinstance(query, data.PathQuery):
                easy = query.t in train_graph.walk_all(query.s, query.r)
            else:
                raise TypeError(type(query))

            if easy:
                easy_queries.append(query)
            else:
                hard_queries.append(query)
        else:

            mc_estimates = train_graph.random_walk_probs(query.s, query.r)
            if query.t in mc_estimates:
                approx = mc_estimates[query.t]
            else:
                approx = 0.
            true = full_graph.random_walk_probs(query.s, query.r)[query.t]
            if abs(true - approx) < epsilon:
                easy_queries.append(query)
            else:
                hard_queries.append(query)

    print "Number of easy queries: ", len(easy_queries)
    print "Number of hard queries: ", len(hard_queries)
    return easy_queries, hard_queries
    def get_examples(name):
        filename = join(data_path, name)
        if not isfile(filename):
            print 'Warning: ', filename, ' not found. Skipping...'
            return None

        examples_arr = list()
        with open(filename, 'r') as f:
            num_examples = 0
            for line in util.verboserate(f):
                if num_examples >= maximum_examples:
                    break
                items = line.split()
                s, path, t = items[:3]
                rels = tuple(path.split(','))
                entities.add(s)
                entities.add(t)
                relations.update(rels)

                if len(items) >= 4:
                    label = items[3]
                else:
                    label = '1'  # if no label, assume positive

                # only add positive examples
                if label == '1':
                    examples_arr.append(PathQuery(s, rels, t))
                    num_examples += 1

        return examples_arr
Exemple #9
0
            def accuracy_mean(dset):
                sample = util.sample_if_large(dset, self.dset_samples)

                vals = []
                for ex in util.verboserate(sample):
                    correct = ex.answer == experiment.model.predict(ex.sentences, ex.mask, ex.question)
                    vals.append(correct)
                return np.mean(vals)
def segmented_evaluation(file_path, categorize=None):
    queries = []
    with open(file_path, 'r') as f:
        for line in util.verboserate(f):
            items = line.split('\t')
            s, r, t = items[0], tuple(items[1].split(',')), items[2]
            q = PathQuery(s, r, t)
            quantile_str = items[3]
            q.quantile = float(quantile_str)
            q.num_candidates = int(items[4])
            queries.append(q)

    def single_relation(query):
        if len(query.r) != 1:
            return False
        r = query.r[-1]
        if inverted(r):
            return False
        return r

    # group queries
    if categorize is None:
        categorize = single_relation

    groups = util.group(queries, categorize)

    print 'computing grouped stats'
    stats = defaultdict(dict)
    for key, queries in util.verboserate(groups.iteritems()):
        scores = [q.quantile for q in queries]
        score = np.nanmean(scores)

        def inv_sigmoid(y):
            return -np.log(1.0 / y - 1)

        score2 = inv_sigmoid(score)

        total = len(scores)
        nontrivial = np.count_nonzero(~np.isnan(scores))

        stats[key] = {'score': score, 'score2': score2, 'total_eval': total, 'nontrivial_eval': nontrivial}

    stats.pop(False, None)
    return pd.DataFrame(stats).transpose()
Exemple #11
0
def compute_best_thresholds(examples, debug=False):
    # per-relation thresholds
    ex_by_rel = util.group(examples, lambda q: q.r[0])
    thresholds = {}
    for relation, examples_r in util.verboserate(ex_by_rel.items()):
        if debug:
            print relation
        scores = [ex.score for ex in examples_r]
        labels = [ex.label for ex in examples_r]
        thresholds[relation] = util.best_threshold(scores, labels, debug)

    return thresholds
def compute_best_thresholds(examples, debug=False):
    # per-relation thresholds
    ex_by_rel = util.group(examples, lambda q: q.r[0])
    thresholds = {}
    for relation, examples_r in util.verboserate(ex_by_rel.items()):
        if debug:
            print relation
        scores = [ex.score for ex in examples_r]
        labels = [ex.label for ex in examples_r]
        thresholds[relation] = util.best_threshold(scores, labels, debug)

    return thresholds
Exemple #13
0
def satisfying_pairs(p, graph):
    pairs = set()

    sources = graph.type_matching_entities(p, 's')

    for s in util.verboserate(sources):
        if len(p) == 1:
            for t in graph.neighbors[s][p[0]]:
                pairs.add((s, t))
        else:
            for t in graph.walk_all(s, p):
                pairs.add((s, t))
    return pairs
def satisfying_pairs(p, graph):
    pairs = set()

    sources = graph.type_matching_entities(p, 's')

    for s in util.verboserate(sources):
        if len(p) == 1:
            for t in graph.neighbors[s][p[0]]:
                pairs.add((s, t))
        else:
            for t in graph.walk_all(s, p):
                pairs.add((s, t))
    return pairs
 def stats(pqs):
     ents = Counter()
     rels = Counter()
     paths = Counter()
     lengths = Counter()
     for pq in util.verboserate(pqs):
         ents[pq.s] += 1
         ents[pq.t] += 1
         path = pq.r
         paths[path] += 1
         lengths[len(path)] += 1
         for r in path:
             rels[r] += 1
     return ents, rels, paths, lengths
 def stats(pqs):
     ents = Counter()
     rels = Counter()
     paths = Counter()
     lengths = Counter()
     for pq in util.verboserate(pqs):
         ents[pq.s] += 1
         ents[pq.t] += 1
         path = pq.r
         paths[path] += 1
         lengths[len(path)] += 1
         for r in path:
             rels[r] += 1
     return ents, rels, paths, lengths
Exemple #17
0
def get_questions_stats(train_data_file, dev_data_file):
    print('1. Getting the number of blanks')

    blank_str = '_blank_'
    num_blanks_map = defaultdict(int)
    word_freq_train = defaultdict(int)
    with open(train_data_file) as train_file:
        for counter, line in enumerate(util.verboserate(train_file)):
            line = line.strip()
            q_json = json.loads(line)
            q = q_json['sentence']
            count = q.count(blank_str)
            num_blanks_map[count] += 1
            words = q.split(' ')
            for word in words:
                word = word.strip()
                word_freq_train[word] += 1
            a_list = q_json['answerSubset']
            for a in a_list:
                word_freq_train[a] = word_freq_train[word] + 1

    print(num_blanks_map)

    print '2. Number of word types in the train set {}'.format(
        len(word_freq_train))

    print '3. Checking overlap with the dev answers'
    dev_answers_present = set()
    dev_answers_oov = set()
    dev_answers = set()
    with open(dev_data_file) as dev_file:
        for line in dev_file:
            line = line.strip()
            dev_json = json.loads(line)
            a_list = dev_json['answerSubset']
            for a in a_list:
                if a in word_freq_train:
                    dev_answers_present.add(a)
                else:
                    dev_answers_oov.add(a)
                dev_answers.add(a)

    print 'Number of unique dev answer strings {}'.format(len(dev_answers))

    print 'Number of oov answer strings in dev set {}'.format(
        len(dev_answers_oov))

    print 'Number of dev answer strings which have atleast 1 occurrences in train set {}'.format(
        len(dev_answers_present))
def load_socher_test(test_set_path):
    examples = []
    with open(join(data_directory, test_set_path), 'r') as f:
        for line in util.verboserate(f):
            items = line.split()
            s, r, t, label = items[0], tuple(items[1].split(',')), items[2], items[3]
            ex = PathQuery(s, r, t)

            if label == '1':
                ex.label = True
            elif label == '-1':
                ex.label = False
            else:
                raise ValueError(label)
            examples.append(ex)
    return examples
Exemple #19
0
def load_socher_test(test_set_path):
    examples = []
    with open(join(data_directory, test_set_path), 'r') as f:
        for line in util.verboserate(f):
            items = line.split()
            s, r, t, label = items[0], tuple(
                items[1].split(',')), items[2], items[3]
            ex = PathQuery(s, r, t)

            if label == '1':
                ex.label = True
            elif label == '-1':
                ex.label = False
            else:
                raise ValueError(label)
            examples.append(ex)
    return examples
Exemple #20
0
def group_queries_by_difficulty(train_graph,
                                full_graph,
                                queries,
                                existence=True,
                                epsilon=5e-1):
    print "Filtering queries contained in train graph"
    easy_queries = []
    hard_queries = []
    for query in util.verboserate(queries):
        if existence:

            if isinstance(query, PathQuery) or isinstance(
                    query, data.PathQuery):
                easy = query.t in train_graph.walk_all(query.s, query.r)
            else:
                raise TypeError(type(query))

            if easy:
                easy_queries.append(query)
            else:
                hard_queries.append(query)
        else:

            mc_estimates = train_graph.random_walk_probs(query.s, query.r)
            if query.t in mc_estimates:
                approx = mc_estimates[query.t]
            else:
                approx = 0.
            true = full_graph.random_walk_probs(query.s, query.r)[query.t]
            if abs(true - approx) < epsilon:
                easy_queries.append(query)
            else:
                hard_queries.append(query)

    print "Number of easy queries: ", len(easy_queries)
    print "Number of hard queries: ", len(hard_queries)
    return easy_queries, hard_queries
def final_evaluation(dataset_path, model_name, params_path, eval_type, eval_samples=float('inf'),
                     max_negative_samples=float('inf'), type_matching_negs=True):
    dset = parse_dataset(dataset_path)
    model = CompositionalModel(None, path_model=model_name, objective='margin')
    params = load_params(params_path, model_name)
    neg_gen = NegativeGenerator(dset.full_graph, max_negative_samples, type_matching_negs=type_matching_negs)

    queries = util.sample_if_large(dset.test, eval_samples, replace=False)

    # Define different evaluation functions
    # ----- ----- ----- ----- -----
    scores = lambda query: model.predict(params, query).ravel()

    def performance(query):
        s, r, t = query.s, query.r, query.t
        negatives = neg_gen(query, 't')
        pos_query = PathQuery(s, r, t)
        neg_query = PathQuery(s, r, negatives)

        # don't score queries with no negatives
        if len(negatives) == 0:
            query.quantile = np.nan
        else:
            query.quantile = util.average_quantile(scores(pos_query), scores(neg_query))

        query.num_candidates = len(negatives) + 1

        attributes = query.s, ','.join(query.r), query.t, str(query.quantile), str(query.num_candidates)
        return '\t'.join(attributes)

    def report(queries):
        # filter out NaNs
        queries = [q for q in queries if not np.isnan(q.quantile)]
        util.metadata('mean_quantile', np.mean([q.quantile for q in queries]))
        util.metadata('h10', np.mean([1.0 if util.rank_from_quantile(q.quantile, q.num_candidates) <= 10 else 0.0 for q in queries]))

    def average_quantile(s, p):
        negatives, positives = neg_gen(PathQuery(s, p, ''), 't', return_positives=True)
        pos_query = PathQuery(s, p, positives)
        neg_query = PathQuery(s, p, negatives)
        return util.average_quantile(scores(pos_query), scores(neg_query))

    def intermediate_aqs(query):
        s, path = query.s, query.r
        aqs = []
        for length in 1 + np.arange(len(path)):
            p = path[:length]
            aq = average_quantile(s, p)
            aqs.append(aq)

        attributes = query.s, ','.join(query.r), query.t, ','.join(str(aq) for aq in aqs)
        return '\t'.join(attributes)

    # ----- ----- ----- ----- -----

    if eval_type == 'mean_quantile':
        eval_fxn = performance
        eval_report = report
    elif eval_type == 'intermediate_aqs':
        eval_fxn = intermediate_aqs
        eval_report = lambda qs: None
    else:
        raise ValueError(eval_type)

    with open('results.tsv', 'w') as f:

        def progress(steps, elapsed):
            print '{} of {} processed ({} s)'.format(steps, len(queries), elapsed)
            util.metadata('steps', steps)
            util.metadata('gb_used', util.gb_used())
            sys.stdout.flush()
            f.flush()

        for query in util.verboserate(queries, report=progress):
            s = eval_fxn(query)
            f.write(s)
            f.write('\n')

    eval_report(queries)

    with open('queries.cpkl', 'w') as f:
        pickle.dump(queries, f)
Exemple #22
0
def final_evaluation(dataset_path,
                     model_name,
                     params_path,
                     eval_type,
                     eval_samples=float('inf'),
                     max_negative_samples=float('inf'),
                     type_matching_negs=True):
    dset = parse_dataset(dataset_path)
    model = CompositionalModel(None, path_model=model_name, objective='margin')
    params = load_params(params_path, model_name)
    neg_gen = NegativeGenerator(dset.full_graph,
                                max_negative_samples,
                                type_matching_negs=type_matching_negs)

    queries = util.sample_if_large(dset.test, eval_samples, replace=False)

    # Define different evaluation functions
    # ----- ----- ----- ----- -----
    scores = lambda query: model.predict(params, query).ravel()

    def performance(query):
        s, r, t = query.s, query.r, query.t
        negatives = neg_gen(query, 't')
        pos_query = PathQuery(s, r, t)
        neg_query = PathQuery(s, r, negatives)

        # don't score queries with no negatives
        if len(negatives) == 0:
            query.quantile = np.nan
        else:
            query.quantile = util.average_quantile(scores(pos_query),
                                                   scores(neg_query))

        query.num_candidates = len(negatives) + 1

        attributes = query.s, ','.join(query.r), query.t, str(
            query.quantile), str(query.num_candidates)
        return '\t'.join(attributes)

    def report(queries):
        # filter out NaNs
        queries = [q for q in queries if not np.isnan(q.quantile)]
        util.metadata('mean_quantile', np.mean([q.quantile for q in queries]))
        util.metadata(
            'h10',
            np.mean([
                1.0
                if util.rank_from_quantile(q.quantile, q.num_candidates) <= 10
                else 0.0 for q in queries
            ]))

    def average_quantile(s, p):
        negatives, positives = neg_gen(PathQuery(s, p, ''),
                                       't',
                                       return_positives=True)
        pos_query = PathQuery(s, p, positives)
        neg_query = PathQuery(s, p, negatives)
        return util.average_quantile(scores(pos_query), scores(neg_query))

    def intermediate_aqs(query):
        s, path = query.s, query.r
        aqs = []
        for length in 1 + np.arange(len(path)):
            p = path[:length]
            aq = average_quantile(s, p)
            aqs.append(aq)

        attributes = query.s, ','.join(query.r), query.t, ','.join(
            str(aq) for aq in aqs)
        return '\t'.join(attributes)

    # ----- ----- ----- ----- -----

    if eval_type == 'mean_quantile':
        eval_fxn = performance
        eval_report = report
    elif eval_type == 'intermediate_aqs':
        eval_fxn = intermediate_aqs
        eval_report = lambda qs: None
    else:
        raise ValueError(eval_type)

    with open('results.tsv', 'w') as f:

        def progress(steps, elapsed):
            print '{} of {} processed ({} s)'.format(steps, len(queries),
                                                     elapsed)
            util.metadata('steps', steps)
            util.metadata('gb_used', util.gb_used())
            sys.stdout.flush()
            f.flush()

        for query in util.verboserate(queries, report=progress):
            s = eval_fxn(query)
            f.write(s)
            f.write('\n')

    eval_report(queries)

    with open('queries.cpkl', 'w') as f:
        pickle.dump(queries, f)
 def mean_rank(self, maximizer, dset):
     sample = util.sample_if_large(dset, self.eval_samples)
     ranks = [self.rank(maximizer, ex) for ex in util.verboserate(sample)]
     return np.nanmean(ranks)
Exemple #24
0
 def accuracy_mean(dset):
     vals = []
     for ex in util.verboserate(dset):
         correct = ex.answer == experiment.model.predict(ex.sentences, ex.mask, ex.question)
         vals.append(correct)
     return np.mean(vals)
Exemple #25
0
def sample_paths(graph, num_paths, max_path_length):
    paths = []
    for k in util.verboserate(range(num_paths)):
        length = random.randint(2, max_path_length)  # don't include length 1
        paths.append(graph.random_path_query(length))
    return paths
def sample_paths(graph, num_paths, max_path_length):
    paths = []
    for k in util.verboserate(range(num_paths)):
        length = random.randint(2, max_path_length)  # don't include length 1
        paths.append(graph.random_path_query(length))
    return paths
 def objective_mean(dset):
     sample = util.sample_if_large(dset, self.dset_samples)
     vals = [maximizer.objective.value(maximizer.params, ex) for ex in util.verboserate(sample)]
     return np.mean(vals)
Exemple #28
0
 def mean_rank(self, maximizer, dset):
     sample = util.sample_if_large(dset, self.eval_samples)
     ranks = [self.rank(maximizer, ex) for ex in util.verboserate(sample)]
     return np.nanmean(ranks)
 def objective_mean(dset):
     sample = util.sample_if_large(dset, self.dset_samples)
     vals = [maximizer.objective.value(maximizer.params, ex) for ex in util.verboserate(sample)]
     return np.mean(vals)