コード例 #1
0
def evaluate_input(searcher, word2idx, idx2word, device):
    tokenizer = DialogSpacyTokenizer(lower=True, specials=HRED_SPECIAL_TOKENS)
    to_token_ids = ToTokenIds(word2idx, specials=HRED_SPECIAL_TOKENS)
    to_tensor = ToTensor()
    transforms = [tokenizer, to_token_ids, to_tensor]
    previous = None
    while True:
        try:
            # Get input sentence
            input_sentence1 = input('> ')
            if input_sentence1 == 'q' or input_sentence1 == 'quit': break

            # Normalize sentence
            input_sentence1 = normalizeString(input_sentence1)

            # Evaluate sentence
            for t in transforms:
                input_sentence1 = t(input_sentence1)

            output_words = evaluate(searcher, idx2word, previous,
                                    input_sentence1, device)
            previous = input_sentence1
            print(output_words)
            output_words[:] = [
                x for x in output_words if not (x == 'EOS' or x == 'PAD')
            ]
            print('Bot:', ' '.join(output_words))

        except KeyError:
            print("Error: Encountered unknown word.")
コード例 #2
0
                        action='store_true',
                        default=False,
                        help='shared weights between encoder '
                        'and decoder')
    parser.add_argument('-shared_emb',
                        action='store_true',
                        default=False,
                        help='shared embedding layer')

    options = parser.parse_args()
    if options.pretraining is True:
        assert False, "you are using this script to fine tune the whole " \
                      "model! -pt should not be activated!"
    # ---  read data to create vocabulary dict ---

    tokenizer = DialogSpacyTokenizer(lower=True, specials=HRED_SPECIAL_TOKENS)

    if options.dataset == "movie":
        dataset = MovieCorpusDatasetTriples('./data/', transforms=None)
    elif options.dataset == "dailydialog":
        dataset = DailyDialogDataset('./data/ijcnlp_dailydialog',
                                     transforms=None)
    elif options.dataset == "semaine":
        dataset = SemaineDatasetTriplesOnly(
            "./data/semaine-database_download_2020-01-21_11_41_49")
    else:
        assert False, "Specify dataset used in options (movie, dailydialog or" \
                      "semaine)"

    dataset.normalize_data()
    if options.preprocess:
コード例 #3
0
    def map(self, t):
        if self.transforms is None:
            self.transforms = []
        self.transforms.append(t)
        return self

    def __len__(self):
        return len(self.triples)

    def __getitem__(self, idx):
        s1, s2, s3 = self.triples[idx]

        if self.transforms is not None:
            for t in self.transforms:
                s1 = t(s1)
                s2 = t(s2)
                s3 = t(s3)
        return s1, s2, s3


if __name__ == '__main__':
    dataset = DailyDialogDataset('./data/ijcnlp_dailydialog', transforms=None)

    tokenizer = DialogSpacyTokenizer(lower=True, specials=HRED_SPECIAL_TOKENS)

    #dataset.normalize_data()
    #dataset.threshold_data(12, tokenizer=tokenizer)
    #dataset.trim_words(3, tokenizer=tokenizer)
    vocab_dict = dataset.create_vocab_dict(tokenizer)
    import ipdb
    ipdb.set_trace()