def do_predict_args(self, line, **kwargs): entities, events, line = ModelShell.parse_entities_and_event(line) print "Predicting args for predicate %s" % self.model.get_model_predicate_repr( entities[0], events[0]) arg_predictions = self.model.predict_args(entities[0], events[0], limit=10) for arg_num, predictions in enumerate(arg_predictions): print "Predictions for arg %d:" % arg_num for word, score in predictions: print " %s (%f)" % (word, score)
def do_repr(self, line, **kwargs): entities, events, line = ModelShell.parse_event_context(line) # Get string representation of events chain_words = list(self.model.word_generator([(entities[0], events) ]))[0] for word in chain_words: if word in self.model.word2vec: print word else: print "Not in vocab: %s" % word
def do_repr(self, line, **kwargs): from whim.entity_narrative import MikolovVerbNarrativeChainModel entities, events, line = ModelShell.parse_event_context(line) # Get string representation of events chain_words = list(MikolovVerbNarrativeChainModel.extract_chain_word_lists([(entities[0], events)]))[0] for word in chain_words: if word in self.model.word2vec: print word else: print "Not in vocab: %s" % word
def do_neighbours(self, line, **kwargs): from whim.entity_narrative import MikolovVerbNarrativeChainModel entities, events, line = ModelShell.parse_event_context(line) if line.strip(): print "Ignoring remainder of input: %s" % line # Get string representation of events chain_words = list(MikolovVerbNarrativeChainModel.extract_chain_word_lists([(entities[0], events)]))[0] if self.model.model_options["cosmul"]: neighbours = self.model.word2vec.most_similar_cosmul(positive=chain_words) else: neighbours = self.model.word2vec.most_similar(positive=chain_words) for predicate, sim in neighbours: print "%s (%g)" % (predicate, sim)
def do_hash(self, line, **kwargs): """ Like predict_next, but instead of directly sampling the next event's surface vector it samples its deepest projection and then finds nearest neighbours to that. """ port = int(kwargs.pop("redis_port", 6379)) # Load a nearest neighbour finder for the model: will fail if one's not been created finder = self.model.get_nearest_neighbour_finder(self.model_name, port=port) # Read an event from the shell input entities, events, __ = ModelShell.parse_entities_and_events( line, max_events=1) # Project this event into the event space projection = self.model.project_events([(entities[0], events[0]) ])[0, :] print finder.hash.hash_vector(projection)
def do_predict_next(self, line, **kwargs): """ Predict the next event given a context chain iterations: number of sampling iterations (default 100) pred, arg0, arg1, arg2: fix the predicate/arg0/arg1/arg2 to be a given word and sample the other components unique_pred: only show one instance of each predicate """ iterations = int(kwargs.pop("its", 100)) given_pred = kwargs.pop("pred", None) given_arg0 = kwargs.pop("arg0", None) given_arg1 = kwargs.pop("arg1", None) given_arg2 = kwargs.pop("arg2", None) unique_pred = str_to_bool(kwargs.pop("unique_pred", False)) entities, events, __ = ModelShell.parse_entities_and_events(line) sampler = NextEventSampler(self.model, pred_given=given_pred, arg0_given=given_arg0, arg1_given=given_arg1, arg2_given=given_arg2) # Get 50 events, to allow the rescoring to reject rubbish ones scored_events = sampler.sample_next_input_events( (entities[0], events), 50, max_iterations=iterations, rescore=True) if unique_pred: # Filter events that have the same predicate as a prediction we've already made def _filter_unique_predicates(it): seen_verbs_uniq = set() for e in it: if e[0].verb_lemma not in seen_verbs_uniq: yield e # Don't yield events with this verb in future seen_verbs_uniq.add(e[0].verb_lemma) scored_events = list(_filter_unique_predicates(scored_events)) for event, score in scored_events[:5]: print event, score
def do_predict_next2(self, line, **kwargs): """ Like predict_next, but instead of directly sampling the next event's surface vector it samples its deepest projection and then finds nearest neighbours to that. """ iterations = int(kwargs.pop("its", 100)) entities, events, __ = ModelShell.parse_entities_and_events(line) try: sampler = self.env["sampler"] except KeyError: print "No predictor has been prepared. Use load_predictor to prepare one" return scored_events = list( sampler.sample_next_input_events((entities[0], events), max_iterations=iterations)) scored_events.sort(key=itemgetter(1), reverse=True) for event, score in scored_events[:10]: print event, score
def do_neighbours(self, line, **kwargs): entities, events, line = ModelShell.parse_event_context(line) if line.strip(): print "Ignoring remainder of input: %s" % line preds = [predicate_relation(entities[0], event) for event in events] # Score all events in the vocabulary pmis = list( reversed( sorted([(vocab_ev, sum( self.model.pmi(vocab_ev, context_ev) for context_ev in preds)) for vocab_ev in self.model.event_counts.keys()], key=itemgetter(1)))) for event, score in pmis[:10]: if score == 0.: break print event, score
def do_neighbours(self, line, **kwargs): """ Like predict_next, but instead of directly sampling the next event's surface vector it samples its deepest projection and then finds nearest neighbours to that. """ port = int(kwargs.pop("redis_port", 6379)) max_num = int(kwargs.pop("max", 10)) # Load a nearest neighbour finder for the model: will fail if one's not been created finder = self.model.get_nearest_neighbour_finder(self.model_name, port=port) # Read an event from the shell input entities, events, __ = ModelShell.parse_entities_and_events( line, max_events=1) # Project this event into the event space projection = self.model.project_events([(entities[0], events[0]) ])[0, :] # Search for neighbours to this projected event for (entity, event), source, result_vector, score in islice( finder.predict_next_event(projection), max_num): print event.to_string_entity_text({entity: "X"}), score
def do_neighbours(self, line, **kwargs): limit = 10 entities, events, line = ModelShell.parse_event_context( line, allow_arg_only=True) if line.strip(): print "Ignoring remainder of input: %s" % line predicates_only = "pred" in kwargs arg_words = [ "arg:%s" % event[1] for event in events if type(event) is tuple ] events = [e for e in events if type(e) is not tuple] # Get string representation of events chain_words = list(self.model.word_generator([(entities[0], events) ]))[0] + arg_words if self.model.model_options["cosmul"]: dists = self.model.word2vec.most_similar_cosmul( positive=chain_words, topn=None) else: dists = self.model.word2vec.most_similar(positive=chain_words, topn=None) best = numpy.argsort(dists)[::-1] returned = 0 for index in best: word = self.model.word2vec.index2word[index] if word in chain_words: # Ignore (don't return) words from the input continue if predicates_only and word.startswith("arg:"): continue print "%s (%g)" % (word, dists[index]) returned += 1 if returned >= limit: break
def do_neighbours(self, line, **kwargs): entities, events, line = ModelShell.parse_event_context(line) if line.strip(): print "Ignoring remainder of input: %s" % line # Get the vector projection of the events projection = self.model.project_chains([(entities[0], events)])[0] # Get similarity of this to each of the vectors # Normalize projection /= numpy.sqrt((projection**2.).sum()) # Vectors may not be normalized vectors = self.model.vectors # Masks aren't stopping division warnings. Poo, can't be arsed to sort it now vectors_norms = numpy.ma.masked_equal( numpy.ma.masked_invalid( numpy.sqrt((vectors**2.).sum(axis=1))[:, numpy.newaxis]), 0.) vectors /= vectors_norms # Now the cosine is just the dot product scores = numpy.dot(vectors, projection) scores = numpy.ma.masked_invalid(scores) neighbours = list(reversed(scores.argsort(fill_value=0.))) for neighbour_id in neighbours[:10]: print self.model.dictionary[neighbour_id], scores[neighbour_id]
def do_predict(self, line, **kwargs): entities, events, line = ModelShell.parse_event_context(line) # Make ordered predictions predictions = self.model.predict_next_event(entities[0], events) for event, score in islice(predictions, 10): print event, score
def do_test(self, line, **kwargs): entities, events, line = ModelShell.parse_entities_and_event(line) pred = self.model.get_model_predicate_repr(entities[0], events[0]) print "Testing reconstruction for predicate %s" % pred pred_index = self.model.pred_vocab[pred].index self.model.projection_model.test([pred_index], [-1], [-1], [-1])
def do_project_chain(self, line, **kwargs): entities, events, line = ModelShell.parse_event_context(line) # Get the model's projection of these events as a single chain vectors = self.model.project_chains([(entities[0], events)]) print vectors[0]
def do_project(self, line, **kwargs): entities, events, line = ModelShell.parse_event_context(line) # Get the model's projection of these events vectors = self.model.project_events([(entities[0], e) for e in events]) for event, vector in zip(events, vectors): print "%s: %s" % (event, vector)