Esempio n. 1
0
 def do_predict_args(self, line, **kwargs):
     entities, events, line = ModelShell.parse_entities_and_event(line)
     print "Predicting args for predicate %s" % self.model.get_model_predicate_repr(
         entities[0], events[0])
     arg_predictions = self.model.predict_args(entities[0],
                                               events[0],
                                               limit=10)
     for arg_num, predictions in enumerate(arg_predictions):
         print "Predictions for arg %d:" % arg_num
         for word, score in predictions:
             print "  %s (%f)" % (word, score)
Esempio n. 2
0
    def do_repr(self, line, **kwargs):
        entities, events, line = ModelShell.parse_event_context(line)
        # Get string representation of events
        chain_words = list(self.model.word_generator([(entities[0], events)
                                                      ]))[0]

        for word in chain_words:
            if word in self.model.word2vec:
                print word
            else:
                print "Not in vocab: %s" % word
Esempio n. 3
0
    def do_repr(self, line, **kwargs):
        from whim.entity_narrative import MikolovVerbNarrativeChainModel

        entities, events, line = ModelShell.parse_event_context(line)
        # Get string representation of events
        chain_words = list(MikolovVerbNarrativeChainModel.extract_chain_word_lists([(entities[0], events)]))[0]

        for word in chain_words:
            if word in self.model.word2vec:
                print word
            else:
                print "Not in vocab: %s" % word
Esempio n. 4
0
    def do_neighbours(self, line, **kwargs):
        from whim.entity_narrative import MikolovVerbNarrativeChainModel

        entities, events, line = ModelShell.parse_event_context(line)
        if line.strip():
            print "Ignoring remainder of input: %s" % line

        # Get string representation of events
        chain_words = list(MikolovVerbNarrativeChainModel.extract_chain_word_lists([(entities[0], events)]))[0]
        if self.model.model_options["cosmul"]:
            neighbours = self.model.word2vec.most_similar_cosmul(positive=chain_words)
        else:
            neighbours = self.model.word2vec.most_similar(positive=chain_words)

        for predicate, sim in neighbours:
            print "%s  (%g)" % (predicate, sim)
Esempio n. 5
0
    def do_hash(self, line, **kwargs):
        """
        Like predict_next, but instead of directly sampling the next event's surface vector it samples
        its deepest projection and then finds nearest neighbours to that.

        """
        port = int(kwargs.pop("redis_port", 6379))

        # Load a nearest neighbour finder for the model: will fail if one's not been created
        finder = self.model.get_nearest_neighbour_finder(self.model_name,
                                                         port=port)
        # Read an event from the shell input
        entities, events, __ = ModelShell.parse_entities_and_events(
            line, max_events=1)
        # Project this event into the event space
        projection = self.model.project_events([(entities[0], events[0])
                                                ])[0, :]
        print finder.hash.hash_vector(projection)
Esempio n. 6
0
    def do_predict_next(self, line, **kwargs):
        """
        Predict the next event given a context chain

        iterations: number of sampling iterations (default 100)
        pred, arg0, arg1, arg2: fix the predicate/arg0/arg1/arg2 to be a given word and sample the other components
        unique_pred: only show one instance of each predicate

        """
        iterations = int(kwargs.pop("its", 100))
        given_pred = kwargs.pop("pred", None)
        given_arg0 = kwargs.pop("arg0", None)
        given_arg1 = kwargs.pop("arg1", None)
        given_arg2 = kwargs.pop("arg2", None)
        unique_pred = str_to_bool(kwargs.pop("unique_pred", False))

        entities, events, __ = ModelShell.parse_entities_and_events(line)

        sampler = NextEventSampler(self.model,
                                   pred_given=given_pred,
                                   arg0_given=given_arg0,
                                   arg1_given=given_arg1,
                                   arg2_given=given_arg2)
        # Get 50 events, to allow the rescoring to reject rubbish ones
        scored_events = sampler.sample_next_input_events(
            (entities[0], events), 50, max_iterations=iterations, rescore=True)

        if unique_pred:
            # Filter events that have the same predicate as a prediction we've already made
            def _filter_unique_predicates(it):
                seen_verbs_uniq = set()
                for e in it:
                    if e[0].verb_lemma not in seen_verbs_uniq:
                        yield e
                        # Don't yield events with this verb in future
                        seen_verbs_uniq.add(e[0].verb_lemma)

            scored_events = list(_filter_unique_predicates(scored_events))

        for event, score in scored_events[:5]:
            print event, score
Esempio n. 7
0
    def do_predict_next2(self, line, **kwargs):
        """
        Like predict_next, but instead of directly sampling the next event's surface vector it samples
        its deepest projection and then finds nearest neighbours to that.

        """
        iterations = int(kwargs.pop("its", 100))
        entities, events, __ = ModelShell.parse_entities_and_events(line)

        try:
            sampler = self.env["sampler"]
        except KeyError:
            print "No predictor has been prepared. Use load_predictor to prepare one"
            return

        scored_events = list(
            sampler.sample_next_input_events((entities[0], events),
                                             max_iterations=iterations))
        scored_events.sort(key=itemgetter(1), reverse=True)
        for event, score in scored_events[:10]:
            print event, score
Esempio n. 8
0
    def do_neighbours(self, line, **kwargs):
        entities, events, line = ModelShell.parse_event_context(line)
        if line.strip():
            print "Ignoring remainder of input: %s" % line

        preds = [predicate_relation(entities[0], event) for event in events]

        # Score all events in the vocabulary
        pmis = list(
            reversed(
                sorted([(vocab_ev,
                         sum(
                             self.model.pmi(vocab_ev, context_ev)
                             for context_ev in preds))
                        for vocab_ev in self.model.event_counts.keys()],
                       key=itemgetter(1))))

        for event, score in pmis[:10]:
            if score == 0.:
                break
            print event, score
Esempio n. 9
0
    def do_neighbours(self, line, **kwargs):
        """
        Like predict_next, but instead of directly sampling the next event's surface vector it samples
        its deepest projection and then finds nearest neighbours to that.

        """
        port = int(kwargs.pop("redis_port", 6379))
        max_num = int(kwargs.pop("max", 10))

        # Load a nearest neighbour finder for the model: will fail if one's not been created
        finder = self.model.get_nearest_neighbour_finder(self.model_name,
                                                         port=port)
        # Read an event from the shell input
        entities, events, __ = ModelShell.parse_entities_and_events(
            line, max_events=1)
        # Project this event into the event space
        projection = self.model.project_events([(entities[0], events[0])
                                                ])[0, :]
        # Search for neighbours to this projected event
        for (entity, event), source, result_vector, score in islice(
                finder.predict_next_event(projection), max_num):
            print event.to_string_entity_text({entity: "X"}), score
Esempio n. 10
0
    def do_neighbours(self, line, **kwargs):
        limit = 10
        entities, events, line = ModelShell.parse_event_context(
            line, allow_arg_only=True)
        if line.strip():
            print "Ignoring remainder of input: %s" % line
        predicates_only = "pred" in kwargs

        arg_words = [
            "arg:%s" % event[1] for event in events if type(event) is tuple
        ]
        events = [e for e in events if type(e) is not tuple]

        # Get string representation of events
        chain_words = list(self.model.word_generator([(entities[0], events)
                                                      ]))[0] + arg_words
        if self.model.model_options["cosmul"]:
            dists = self.model.word2vec.most_similar_cosmul(
                positive=chain_words, topn=None)
        else:
            dists = self.model.word2vec.most_similar(positive=chain_words,
                                                     topn=None)

        best = numpy.argsort(dists)[::-1]
        returned = 0
        for index in best:
            word = self.model.word2vec.index2word[index]

            if word in chain_words:
                # Ignore (don't return) words from the input
                continue
            if predicates_only and word.startswith("arg:"):
                continue

            print "%s  (%g)" % (word, dists[index])
            returned += 1
            if returned >= limit:
                break
Esempio n. 11
0
    def do_neighbours(self, line, **kwargs):
        entities, events, line = ModelShell.parse_event_context(line)
        if line.strip():
            print "Ignoring remainder of input: %s" % line

        # Get the vector projection of the events
        projection = self.model.project_chains([(entities[0], events)])[0]
        # Get similarity of this to each of the vectors
        # Normalize
        projection /= numpy.sqrt((projection**2.).sum())
        # Vectors may not be normalized
        vectors = self.model.vectors
        # Masks aren't stopping division warnings. Poo, can't be arsed to sort it now
        vectors_norms = numpy.ma.masked_equal(
            numpy.ma.masked_invalid(
                numpy.sqrt((vectors**2.).sum(axis=1))[:, numpy.newaxis]), 0.)
        vectors /= vectors_norms
        # Now the cosine is just the dot product
        scores = numpy.dot(vectors, projection)
        scores = numpy.ma.masked_invalid(scores)

        neighbours = list(reversed(scores.argsort(fill_value=0.)))
        for neighbour_id in neighbours[:10]:
            print self.model.dictionary[neighbour_id], scores[neighbour_id]
Esempio n. 12
0
 def do_predict(self, line, **kwargs):
     entities, events, line = ModelShell.parse_event_context(line)
     # Make ordered predictions
     predictions = self.model.predict_next_event(entities[0], events)
     for event, score in islice(predictions, 10):
         print event, score
Esempio n. 13
0
 def do_test(self, line, **kwargs):
     entities, events, line = ModelShell.parse_entities_and_event(line)
     pred = self.model.get_model_predicate_repr(entities[0], events[0])
     print "Testing reconstruction for predicate %s" % pred
     pred_index = self.model.pred_vocab[pred].index
     self.model.projection_model.test([pred_index], [-1], [-1], [-1])
Esempio n. 14
0
 def do_project_chain(self, line, **kwargs):
     entities, events, line = ModelShell.parse_event_context(line)
     # Get the model's projection of these events as a single chain
     vectors = self.model.project_chains([(entities[0], events)])
     print vectors[0]
Esempio n. 15
0
 def do_project(self, line, **kwargs):
     entities, events, line = ModelShell.parse_event_context(line)
     # Get the model's projection of these events
     vectors = self.model.project_events([(entities[0], e) for e in events])
     for event, vector in zip(events, vectors):
         print "%s: %s" % (event, vector)