Esempio n. 1
0
    def __init__(self,
                 data_bundle,
                 vocabulary=None):  #, c_len=None, q_len=None):
        (self.contexts, self.questions, self.choices, self.labels,
         self.choices_map, self.context_lens, self.qs_lens) = data_bundle
        if vocabulary:
            self.vocab = vocabulary
        else:
            self.vocab = rn.get_vocab(self.questions,
                                      self.contexts,
                                      min_frequency=10)
        self.vocab_size = len(self.vocab.vocabulary_)

        self.labels_idx = sorted(
            list(
                set([choice for choices in self.choices
                     for choice in choices])))
        print(self.contexts[:10])
        print(self.questions[:10])
        print(self.labels[:10])
        self.transformed_labels_idx = [
            x[0] for x in list(self.vocab.transform(self.labels_idx))
        ]
        print(self.transformed_labels_idx)

        self.contexts = rn.vocab_transform(self.contexts, self.vocab)
        self.questions = rn.vocab_transform(self.questions, self.vocab)
Esempio n. 2
0
    def __init__(self, data_bundle, vocabulary=None):#, c_len=None, q_len=None):
        (self.contexts, self.questions, self.choices, self.labels,
            self.choices_map, self.context_lens, self.qs_lens) = data_bundle
        if vocabulary:
            self.vocab = vocabulary
        else:
            self.vocab = rn.get_vocab(
                self.questions, self.contexts, min_frequency=10)
        self.vocab_size = len(self.vocab.vocabulary_)

        self.labels_idx = sorted(
            list(set([choice for choices in self.choices for choice in choices]))
        )
        self.transformed_labels_idx = [x[0] for x in\
                list(self.vocab.transform(self.labels_idx))]
        print(self.transformed_labels_idx)
        print([x for x in self.questions if '@placeholder' not in x.split(" ")])

        self.contexts = rn.vocab_transform(self.contexts, self.vocab)
        self.questions = rn.vocab_transform(self.questions, self.vocab)
        placeholder_token = ['@placeholder']
        placeholder_idx = rn.vocab_transform(placeholder_token,self.vocab)[0][0]
        print(placeholder_idx)
        print(len([x for x in self.questions if placeholder_idx in x]))
        self.placeholder_inds = np.array([list(x).index(placeholder_idx)\
                for x in self.questions]).astype(int)
        print(self.placeholder_inds.shape)
Esempio n. 3
0
    def __init__(self, config, data_path=None, vocabulary=None, name=None):
        self.batch_size = batch_size = config.batch_size
        self.num_steps = num_steps = config.num_steps
        raw_context, raw_questions, raw_choices, raw_labels, self.choices_map = \
                read.load_data(data_path)
        all_choices = read.build_choices(raw_choices)
        self.epoch_size = ((len(raw_context) // batch_size) - 1) // num_steps
        # build vocab for train data
        if not vocabulary:
            self.vocabulary = read.get_vocab(raw_questions,\
                    raw_context,min_frequency=500)
        else:
            self.vocabulary = vocabulary

        raw_choices = [" ".join(x) for x in raw_choices]
        self.all_choices = read.vocab_transform(all_choices, self.vocabulary)
        self.questions = read.vocab_transform(raw_questions, self.vocabulary)
        self.context = read.vocab_transform(raw_context, self.vocabulary)
        self.labels = read.vocab_transform(raw_labels, self.vocabulary)
        self.choices = read.vocab_transform([" ".join(x) for x in raw_choices],
                                            self.vocabulary)
Esempio n. 4
0
    def __init__(self, data_bundle, vocabulary=None):
        (contexts, questions, self.choices, self.labels, self.choices_map,
         self.context_lens, self.qs_lens) = data_bundle

        if vocabulary:
            self.vocab = vocabulary
        else:
            self.vocab = rn.get_vocab(questions,
                                      contexts,
                                      min_frequency=FLAGS.min_freq)
        self.vocab_size = len(self.vocab.vocabulary_)

        self.labels_idx = sorted(
            list(
                set([choice for choices in self.choices
                     for choice in choices])))

        contexts = rn.vocab_transform(contexts, self.vocab)
        self.contexts = rn.pad_eval(contexts, FLAGS.context_steps)

        questions = rn.vocab_transform(questions, self.vocab)
        self.questions = rn.pad_eval(questions, FLAGS.question_steps)
Esempio n. 5
0
from reader import load_data
from reader import get_vocab
from reader import vocab_transform
from reader import batch_iter


contexts, questions, choices, labels, choices_map, context_lens, qs_lens =\
    load_data(data_path="wdw/test")

# # 2. Fit vocabulary with questions and context.
vocab = get_vocab(contexts, questions)

# # 3. Transform context and questions
contexts = vocab_transform(contexts, vocab)
questions = vocab_transform(questions, vocab)

# 4. Give to batch_iter
readers = batch_iter(contexts, questions, choices, labels, choices_map,
                     context_lens, qs_lens)

# for q, c, ch, lab, ch_map, c_lens, q_lens in readers:
#     print(c.shape)
#     break