def generate_story(title, keywords, K, extendable): modified_keywords = constrained_generate(kw_model, kw_vocab, keywords, title, kw_eos_id, max_kw_len, kw_dedup, cuda, kw_temperature, K, extendable) prefix = modified_keywords tokens = generate(st_model, st_vocab, prefix, st_eos_id, max_st_len, st_dedup, cuda, st_temperature) return modified_keywords, tokens
def generate_storyline(self, topic, kw_temp=None, dedup=None, max_len=None, use_gold_titles=None, oov_handling=None): if dedup is None: dedup = True if max_len is None or max_len == 0: max_len = 25 if use_gold_titles is None: use_gold_titles = True topic = preprocess(topic, self.special_chars, self.nlp) topic = self.apply_oov(topic, self.kw_dict.word2idx, oov_handling) topic_str = ' '.join(topic) print( "%s: Generating a storyline for \"%s\" (kw_temp=%s, dedup=%s, max_len=%s, use_gold_titles=%s)" % (self.system_id, topic, "NONE" if kw_temp is None else str(kw_temp), "NONE" if dedup is None else str(dedup), "NONE" if max_len is None else str(max_len), "NONE" if use_gold_titles is None else str(use_gold_titles))) title = to_ints(topic, self.kw_dict) + [self.kw_eot_id ] # title ends with EOT if use_gold_titles and topic_str in self.title2storyline.keys(): print("%s: Using gold title storyline:" % (self.system_id)) # title storyline mappings are not guaranteed to be unique. Currently picks first, could code differently storyline = self.title2storyline[topic_str][0] else: print("%s: Generating storyline from model:" % (self.system_id)) # Storyline ends with EOL all_tokens = generate(self.kw_model, self.kw_dict, title, self.kw_end_id, self.kw_sep_id, max_len, dedup, self.use_cuda, kw_temp) storyline = ' '.join(all_tokens) #detokenize storyline = self.detokenizer(storyline.split()) print("%s: storyline: %s" % (self.system_id, storyline)) return self.format_response2(storyline)
def generate_response(self, topic, kw_temp=None, story_temp=None, dedup=None, max_len=None, use_gold_titles=None, oov_handling=None): story_temp = 0.22 print( "%s: Processing %s (story_temp=%s, dedup=%s, max_len=%s, sampling)" % (self.system_id, topic, "NONE" if story_temp is None else str(story_temp), "NONE" if dedup is None else str(dedup), "NONE" if max_len is None else str(max_len))) #if story_temp is None: # story_temp = 0.3 # Default value for this model #elif story_temp == 0.0: # story_temp = 0.001 # Avoid division by 0 if dedup is None: dedup = False if max_len is None or max_len == 0: max_len = 250 topic = preprocess(topic, self.special_chars, self.nlp) topic = self.apply_oov(topic, self.st_dict.word2idx, oov_handling) story_prefix = to_ints(topic, self.st_dict) story_prefix.append(self.st_eot_id) story_phrases = generate(self.st_model, self.st_dict, story_prefix, self.st_eos_id, self.st_sep_id, max_len, dedup, self.use_cuda, story_temp) story = ' '.join(story_phrases) return self.format_response(story)
def generate_response(self, topic, kw_temp=None, story_temp=None, dedup=None, max_len=None, use_gold_titles=None, oov_handling=None): print( "%s: Processing %s (kw_temp=%s, story_temp=%s, dedup=%s, max_len=%s, use_gold_titles=%s)" % (self.system_id, topic, "NONE" if kw_temp is None else str(kw_temp), "NONE" if story_temp is None else str(story_temp), "NONE" if dedup is None else str(dedup), "NONE" if max_len is None else str(max_len), "NONE" if use_gold_titles is None else str(use_gold_titles))) if dedup is None: dedup = True if max_len is None or max_len == 0: max_len = 25 if use_gold_titles is None: use_gold_titles = True topic = preprocess(topic, self.special_chars, self.nlp) topic = self.apply_oov(topic, self.kw_dict.word2idx, oov_handling) topic_str = ' '.join(topic) title = to_ints(topic, self.kw_dict) + [self.kw_eot_id ] # title ends with EOT if use_gold_titles and topic_str in self.title2storyline.keys(): print("%s: Using gold title storyline:" % (self.system_id)) # title storyline mappings are not guaranteed to be unique. Currently picks first, could code differently storyline = self.title2storyline[topic_str][0] else: print("%s: Generating storyline from model:" % (self.system_id)) # Storyline ends with EOL all_tokens = generate(self.kw_model, self.kw_dict, title, self.kw_end_id, self.kw_sep_id, max_len, dedup, self.use_cuda, kw_temp) storyline = ' '.join(all_tokens) topic_and_storyline = topic_str + " " + self.title_end + " " + storyline print("%s: %s" % (self.system_id, topic_and_storyline)) story_prefix = to_ints(topic_and_storyline.split(), self.st_dict) with torch.no_grad(): tokens = self.decoder.decode(story_prefix, temperature=story_temp) cont_tokens = [ self.st_dict.idx2word[tokens[j]] for j in range(len(story_prefix), len(tokens)) ] if self.print_cond_data: init_tokens = [ self.st_dict.idx2word[tokens[i]] for i in range(len(story_prefix)) ] cont_tokens = init_tokens + cont_tokens story = ' '.join(cont_tokens) # detokenize story = self.detokenizer(story.split()) storyline = self.detokenizer(storyline.split()) formatted_storyline = self.format_response(storyline) formatted_story = self.format_response(story) return "<h2>Storyline</h2>%s<h2>Story</h2>%s" % (formatted_storyline, formatted_story)
def collab_storyline(self, topic, current_storyline, kw_temp=None, dedup=None, max_len=None, oov_handling=None): if dedup is None: dedup = True if max_len is None or max_len == 0: max_len = 25 topic = preprocess(topic, self.special_chars, self.nlp) topic = self.apply_oov(topic, self.kw_dict.word2idx, oov_handling) topic_str = " ".join(topic) current_storyline = [phrase.strip() for phrase in current_storyline] if len(current_storyline) > 0: current_storyline_with_sep = (" " + self.kw_sep + " ").join( current_storyline) + " " + self.kw_sep # Storyline formatting for models print("Before", current_storyline_with_sep) current_storyline_with_sep = preprocess(current_storyline_with_sep, self.special_chars, self.nlp) current_storyline_with_sep = self.apply_oov( current_storyline_with_sep, self.kw_dict.word2idx, oov_handling) print("After", current_storyline_with_sep) else: current_storyline_with_sep = "" #TODO fix this print statement with the awkward joins print( "%s: Collaborating on %s %s %s %s (kw_temp=%s, dedup=%s)" % (self.system_id, topic, self.title_end, current_storyline_with_sep, self.kw_sep if len(current_storyline) > 0 else " ", "NONE" if kw_temp is None else str(kw_temp), "NONE" if dedup is dedup is None else str(dedup))) used_word_ints = set() current_tokens = to_ints(topic, self.kw_dict) current_tokens.append(self.kw_eot_id) # title ends with EOT print("Current storyline %s" % current_storyline) if current_storyline_with_sep: phrase_tokens = to_ints(current_storyline_with_sep, self.kw_dict) used_word_ints.update(set(phrase_tokens)) current_tokens.extend(phrase_tokens) #print("max len %s" % max_len) #print(current_tokens) new_words = generate(self.kw_model, self.kw_dict, current_tokens, self.kw_end_id, self.kw_sep_id, max_len, dedup, self.use_cuda, kw_temp, only_one=True, forbidden=used_word_ints) if len(new_words) == 0: # This shouldn't happen, right? print("%s: Returning with nothing to add." % (self.system_id)) return {"new_phrase": "", "end_flag": True} last_word = new_words.pop( ) # removes the kw sep TODO would be better to not remove here and just mask in UI valid_ends = {self.kw_end, self.kw_sep} if last_word not in valid_ends: print("%s is not a valid storyline end token. Valid tokens: [%s]" % (last_word, valid_ends)) end_flag = last_word == self.kw_end new_phrase = " ".join(new_words) print("%s: Returning %s (end_flag=%s last_word=%s)" % (self.system_id, new_phrase, end_flag, last_word)) return {"new_phrase": new_phrase, "end_flag": end_flag}