def generate_story(title, keywords, K, extendable):
    modified_keywords = constrained_generate(kw_model, kw_vocab, keywords,
                                             title, kw_eos_id, max_kw_len,
                                             kw_dedup, cuda, kw_temperature, K,
                                             extendable)

    prefix = modified_keywords
    tokens = generate(st_model, st_vocab, prefix, st_eos_id, max_st_len,
                      st_dedup, cuda, st_temperature)

    return modified_keywords, tokens
Ejemplo n.º 2
0
    def generate_storyline(self,
                           topic,
                           kw_temp=None,
                           dedup=None,
                           max_len=None,
                           use_gold_titles=None,
                           oov_handling=None):
        if dedup is None:
            dedup = True

        if max_len is None or max_len == 0:
            max_len = 25

        if use_gold_titles is None:
            use_gold_titles = True

        topic = preprocess(topic, self.special_chars, self.nlp)
        topic = self.apply_oov(topic, self.kw_dict.word2idx, oov_handling)
        topic_str = ' '.join(topic)

        print(
            "%s: Generating a storyline for  \"%s\" (kw_temp=%s, dedup=%s, max_len=%s, use_gold_titles=%s)"
            % (self.system_id, topic, "NONE" if kw_temp is None else
               str(kw_temp), "NONE" if dedup is None else str(dedup),
               "NONE" if max_len is None else str(max_len),
               "NONE" if use_gold_titles is None else str(use_gold_titles)))

        title = to_ints(topic, self.kw_dict) + [self.kw_eot_id
                                                ]  # title ends with EOT

        if use_gold_titles and topic_str in self.title2storyline.keys():
            print("%s: Using gold title storyline:" % (self.system_id))
            # title storyline mappings are not guaranteed to be unique. Currently picks first, could code differently
            storyline = self.title2storyline[topic_str][0]
        else:
            print("%s: Generating storyline from model:" % (self.system_id))
            # Storyline ends with EOL
            all_tokens = generate(self.kw_model, self.kw_dict, title,
                                  self.kw_end_id, self.kw_sep_id, max_len,
                                  dedup, self.use_cuda, kw_temp)
            storyline = ' '.join(all_tokens)

        #detokenize
        storyline = self.detokenizer(storyline.split())
        print("%s: storyline: %s" % (self.system_id, storyline))
        return self.format_response2(storyline)
    def generate_response(self,
                          topic,
                          kw_temp=None,
                          story_temp=None,
                          dedup=None,
                          max_len=None,
                          use_gold_titles=None,
                          oov_handling=None):
        story_temp = 0.22
        print(
            "%s: Processing %s (story_temp=%s, dedup=%s, max_len=%s, sampling)"
            % (self.system_id, topic, "NONE" if story_temp is None else
               str(story_temp), "NONE" if dedup is None else str(dedup),
               "NONE" if max_len is None else str(max_len)))

        #if story_temp is None:
        #    story_temp = 0.3 # Default value for this model
        #elif story_temp == 0.0:
        #    story_temp = 0.001 # Avoid division by 0

        if dedup is None:
            dedup = False

        if max_len is None or max_len == 0:
            max_len = 250

        topic = preprocess(topic, self.special_chars, self.nlp)
        topic = self.apply_oov(topic, self.st_dict.word2idx, oov_handling)
        story_prefix = to_ints(topic, self.st_dict)
        story_prefix.append(self.st_eot_id)

        story_phrases = generate(self.st_model, self.st_dict, story_prefix,
                                 self.st_eos_id, self.st_sep_id, max_len,
                                 dedup, self.use_cuda, story_temp)
        story = ' '.join(story_phrases)

        return self.format_response(story)
Ejemplo n.º 4
0
    def generate_response(self,
                          topic,
                          kw_temp=None,
                          story_temp=None,
                          dedup=None,
                          max_len=None,
                          use_gold_titles=None,
                          oov_handling=None):
        print(
            "%s: Processing %s (kw_temp=%s, story_temp=%s, dedup=%s, max_len=%s, use_gold_titles=%s)"
            % (self.system_id, topic, "NONE" if kw_temp is None else
               str(kw_temp), "NONE" if story_temp is None else str(story_temp),
               "NONE" if dedup is None else str(dedup),
               "NONE" if max_len is None else str(max_len),
               "NONE" if use_gold_titles is None else str(use_gold_titles)))

        if dedup is None:
            dedup = True

        if max_len is None or max_len == 0:
            max_len = 25

        if use_gold_titles is None:
            use_gold_titles = True

        topic = preprocess(topic, self.special_chars, self.nlp)
        topic = self.apply_oov(topic, self.kw_dict.word2idx, oov_handling)
        topic_str = ' '.join(topic)
        title = to_ints(topic, self.kw_dict) + [self.kw_eot_id
                                                ]  # title ends with EOT

        if use_gold_titles and topic_str in self.title2storyline.keys():
            print("%s: Using gold title storyline:" % (self.system_id))
            # title storyline mappings are not guaranteed to be unique. Currently picks first, could code differently
            storyline = self.title2storyline[topic_str][0]
        else:
            print("%s: Generating storyline from model:" % (self.system_id))
            # Storyline ends with EOL
            all_tokens = generate(self.kw_model, self.kw_dict, title,
                                  self.kw_end_id, self.kw_sep_id, max_len,
                                  dedup, self.use_cuda, kw_temp)
            storyline = ' '.join(all_tokens)

        topic_and_storyline = topic_str + " " + self.title_end + " " + storyline
        print("%s: %s" % (self.system_id, topic_and_storyline))
        story_prefix = to_ints(topic_and_storyline.split(), self.st_dict)

        with torch.no_grad():
            tokens = self.decoder.decode(story_prefix, temperature=story_temp)

        cont_tokens = [
            self.st_dict.idx2word[tokens[j]]
            for j in range(len(story_prefix), len(tokens))
        ]
        if self.print_cond_data:
            init_tokens = [
                self.st_dict.idx2word[tokens[i]]
                for i in range(len(story_prefix))
            ]
            cont_tokens = init_tokens + cont_tokens
        story = ' '.join(cont_tokens)

        # detokenize
        story = self.detokenizer(story.split())
        storyline = self.detokenizer(storyline.split())

        formatted_storyline = self.format_response(storyline)
        formatted_story = self.format_response(story)

        return "<h2>Storyline</h2>%s<h2>Story</h2>%s" % (formatted_storyline,
                                                         formatted_story)
Ejemplo n.º 5
0
    def collab_storyline(self,
                         topic,
                         current_storyline,
                         kw_temp=None,
                         dedup=None,
                         max_len=None,
                         oov_handling=None):
        if dedup is None:
            dedup = True

        if max_len is None or max_len == 0:
            max_len = 25

        topic = preprocess(topic, self.special_chars, self.nlp)
        topic = self.apply_oov(topic, self.kw_dict.word2idx, oov_handling)
        topic_str = " ".join(topic)

        current_storyline = [phrase.strip() for phrase in current_storyline]

        if len(current_storyline) > 0:
            current_storyline_with_sep = (" " + self.kw_sep + " ").join(
                current_storyline) + " " + self.kw_sep
            # Storyline formatting for models
            print("Before", current_storyline_with_sep)
            current_storyline_with_sep = preprocess(current_storyline_with_sep,
                                                    self.special_chars,
                                                    self.nlp)
            current_storyline_with_sep = self.apply_oov(
                current_storyline_with_sep, self.kw_dict.word2idx,
                oov_handling)
            print("After", current_storyline_with_sep)
        else:
            current_storyline_with_sep = ""

        #TODO fix this print statement with the awkward joins
        print(
            "%s: Collaborating on %s %s %s %s (kw_temp=%s, dedup=%s)" %
            (self.system_id, topic, self.title_end, current_storyline_with_sep,
             self.kw_sep if len(current_storyline) > 0 else " ",
             "NONE" if kw_temp is None else str(kw_temp),
             "NONE" if dedup is dedup is None else str(dedup)))

        used_word_ints = set()
        current_tokens = to_ints(topic, self.kw_dict)
        current_tokens.append(self.kw_eot_id)  # title ends with EOT
        print("Current storyline %s" % current_storyline)
        if current_storyline_with_sep:
            phrase_tokens = to_ints(current_storyline_with_sep, self.kw_dict)
            used_word_ints.update(set(phrase_tokens))
            current_tokens.extend(phrase_tokens)
        #print("max len %s" % max_len)
        #print(current_tokens)
        new_words = generate(self.kw_model,
                             self.kw_dict,
                             current_tokens,
                             self.kw_end_id,
                             self.kw_sep_id,
                             max_len,
                             dedup,
                             self.use_cuda,
                             kw_temp,
                             only_one=True,
                             forbidden=used_word_ints)
        if len(new_words) == 0:
            # This shouldn't happen, right?
            print("%s: Returning with nothing to add." % (self.system_id))
            return {"new_phrase": "", "end_flag": True}

        last_word = new_words.pop(
        )  # removes the kw sep TODO would be better to not remove here and just mask in UI
        valid_ends = {self.kw_end, self.kw_sep}
        if last_word not in valid_ends:
            print("%s is not a valid storyline end token. Valid tokens: [%s]" %
                  (last_word, valid_ends))
        end_flag = last_word == self.kw_end
        new_phrase = " ".join(new_words)
        print("%s: Returning %s (end_flag=%s last_word=%s)" %
              (self.system_id, new_phrase, end_flag, last_word))
        return {"new_phrase": new_phrase, "end_flag": end_flag}