Exemple #1
0
def char2vec_one_hot_fast(pairs, character_positions_in_vocabulary):
    if not isinstance(pairs[0], tuple):
        pairs = [pairs]
    b_size = len(pairs)
    num_punc_marks = [len(pair) - 1 for pair in pairs]
    word_char_positions = character_positions_in_vocabulary[0]
    punctuation_char_positions = character_positions_in_vocabulary[1]

    word_vec = np.zeros(shape=(b_size, 1), dtype=np.int32)
    punc_vecs = [
        np.zeros(shape=(b_size, 1), dtype=np.int32)
        for _ in range(MAX_NUM_PUNCTUATION_MARKS)
    ]

    for b, pair in enumerate(pairs):
        word_vec[b, 0] = char2id(pair[0], word_char_positions)
        for punc_idx, punc_vec in enumerate(punc_vecs):
            if punc_idx < num_punc_marks[b]:
                # print('pair:', pair)
                # print('punc_idx:', punc_idx)
                # print('num_punc_marks[b]:', num_punc_marks[b])
                punc_vec[b, 0] = char2id(pair[punc_idx + 1],
                                         punctuation_char_positions) + 1
            else:
                punc_vec[b, 0] = 0
    np.set_printoptions(threshold=np.nan, linewidth=52)
    # print('(char2vec_one_hot)pairs:', pairs)
    # print('(char2vec_one_hot) returned:\n', np.reshape(np.concatenate(tuple([word_vec] + punc_vecs), axis=1), [-1]))
    return np.concatenate(tuple([word_vec] + punc_vecs), axis=1)
 def _next_batch(self):
     """Generate a single batch from the current cursor position in the data."""
     base = np.zeros(shape=(self._batch_size, self._vocabulary_size),
                     dtype=np.float32)
     bot_speaks_flags = np.zeros(shape=(self._batch_size, 1),
                                 dtype=np.float32)
     speaker_flags = np.zeros(shape=(self._batch_size,
                                     self._number_of_speakers),
                              dtype=np.float32)
     for b in range(self._batch_size):
         try:
             pos = self._cursor[b]
             chr = self._text[pos]
             chr_id = char2id(chr, self._character_positions_in_vocabulary)
             base[b, chr_id] = 1.0
         except IndexError:
             # print('(_next_batch)self._cursor:', self._cursor)
             # print('(_next_batch)b:', b)
             # print('(_next_batch)self._text:', self._text)
             # print('(_next_batch)pos:', pos)
             raise
         speaker_flags[b, self._speaker_flags[self._cursor[b]]] = 1.0
         bot_speaks_flags[b, 0] = float(
             self._bot_speaks_flags[self._cursor[b]])
         self._cursor[b] = (self._cursor[b] + 1) % self._text_size
     inputs = np.concatenate((base, speaker_flags, bot_speaks_flags), 1)
     labels = np.concatenate((base, bot_speaks_flags), 1)
     self._counter += 1
     return inputs, labels
 def _start_batch(self):
     batch = np.zeros(shape=(self._batch_size, self._vocabulary_size),
                      dtype=np.float)
     for b in range(self._batch_size):
         batch[b,
               char2id('\n', self.characters_positions_in_vocabulary)] = 1.0
     return batch
 def _next_batch(self):
     """Generate a single batch from the current cursor position in the data."""
     ret = np.array([[char2id(self._text[self._cursor[b]], self.character_positions_in_vocabulary)]
                     for b in range(self._batch_size)])
     for b in range(self._batch_size):
         self._cursor[b] = (self._cursor[b] + 1) % self._text_size
     return ret
 def _next_batch(self):
     """Generate a single batch from the current cursor position in the data."""
     batch = np.zeros(shape=(self._batch_size, self._vocabulary_size), dtype=np.float)
     for b in range(self._batch_size):
         batch[b, char2id(self._text[self._cursor[b]], self.characters_positions_in_vocabulary)] = 1.0
         self._cursor[b] = (self._cursor[b] + 1) % self._text_size
     return batch
Exemple #6
0
 def _next_batch(self):
     """Generate a single batch from the current cursor position in the data."""
     batch = np.zeros(shape=(self._batch_size, self._vocabulary_size), dtype=np.float)
     for b in range(self._batch_size):
         # print('len(self._pairs):', len(self._pairs))
         # print('self._cursor[b]:', self._cursor[b])
         batch[b, char2id(self._pairs[self._cursor[b]], self.character_positions_in_vocabulary)] = 1.0
         self._cursor[b] = (self._cursor[b] + 1) % self._number_of_pairs
     return batch
Exemple #7
0
 def _create_id_array(pairs, character_positions_in_vocabulary):
     number_of_pairs = len(pairs)
     ids = np.zeros(shape=(number_of_pairs, MAX_NUM_PUNCTUATION_MARKS),
                    dtype=np.int16)
     for p_idx, p in enumerate(pairs):
         for t_idx, (token, cpiv) in enumerate(
                 zip(p, character_positions_in_vocabulary)):
             ids[p_idx, t_idx] = char2id(token, cpiv) + 1
     return ids
Exemple #8
0
 def _next_batch_with_tokens(self):
     tokens = list()
     bs = list()
     for b in range(self._batch_size):
         # print('len(self._pairs):', len(self._pairs))
         # print('self._cursor[b]:', self._cursor[b])
         tokens.append(self._pairs[self._cursor[b]])
         bs.append(np.array([char2id(self._pairs[self._cursor[b]], self.character_positions_in_vocabulary)]))
         self._cursor[b] = (self._cursor[b] + 1) % self._number_of_pairs
     return np.stack(bs), tokens
Exemple #9
0
 def _next_batch_with_tokens(self):
     batch = np.zeros(shape=(self._batch_size, self._vocabulary_size), dtype=np.float)
     tokens = list()
     for b in range(self._batch_size):
         # print('len(self._pairs):', len(self._pairs))
         # print('self._cursor[b]:', self._cursor[b])
         tokens.append(self._pairs[self._cursor[b]])
         batch[b, char2id(self._pairs[self._cursor[b]], self.character_positions_in_vocabulary)] = 1.0
         self._cursor[b] = (self._cursor[b] + 1) % self._number_of_pairs
     return batch, tokens
def char2vec(char, character_positions_in_vocabulary, speaker_idx,
             speaker_flag_size):
    voc_size = len(character_positions_in_vocabulary)
    vec = np.zeros(shape=(1, voc_size + speaker_flag_size + 1),
                   dtype=np.float32)
    vec[0, char2id(char, character_positions_in_vocabulary)] = 1.0
    vec[0, voc_size + speaker_idx] = 1.0
    if speaker_idx > 0:
        vec[0, voc_size + speaker_flag_size] = 1.
    return vec
Exemple #11
0
def char2vec(character_positions_in_vocabulary,
             char,
             speaker_flag_size=2,
             speaker_idx=0,
             bot_answer_flag=0,
             eod=False):
    voc_size = len(character_positions_in_vocabulary)
    vec = np.zeros(shape=(1, voc_size + speaker_flag_size + 2),
                   dtype=np.float32)
    vec[0, char2id(char, character_positions_in_vocabulary)] = 1.0
    vec[0, voc_size + speaker_idx] = 1.0
    vec[0, voc_size + speaker_flag_size] = float(bot_answer_flag)
    vec[0, voc_size + speaker_flag_size + 1] = float(eod)
    return vec
 def _start_batch(self):
     base = np.zeros(shape=(self._batch_size, self._vocabulary_size),
                     dtype=np.float32)
     bot_speaks_flags = np.zeros(shape=(self._batch_size, 1),
                                 dtype=np.float32)
     speaker_flags = np.zeros(shape=(self._batch_size,
                                     self._number_of_speakers))
     for b in range(self._batch_size):
         base[b,
              char2id('\n', self._character_positions_in_vocabulary)] = 1.0
         speaker_flags[b, 1] = 1.
         bot_speaks_flags[b, 0] = 0.
     start_inputs = np.concatenate((base, speaker_flags, bot_speaks_flags),
                                   1)
     start_labels = np.concatenate((base, bot_speaks_flags), 1)
     return start_inputs, start_labels
Exemple #13
0
 def _start_batch(self):
     word_batch = np.zeros(shape=(self._batch_size,
                                  self._vocabulary_sizes[0]),
                           dtype=np.float)
     for b in range(self._batch_size):
         word_batch[
             b,
             char2id('\n', self.character_positions_in_vocabulary[0])] = 1.0
     no_punc_batch = np.zeros(shape=(self._batch_size,
                                     self._vocabulary_sizes[1] + 1),
                              dtype=np.float)
     for b in range(self._batch_size):
         no_punc_batch[b, 0] = 1.0
     return np.concatenate(
         tuple([word_batch] + [no_punc_batch] * MAX_NUM_PUNCTUATION_MARKS),
         axis=1)
Exemple #14
0
 def _next_batch(self):
     """Generate a single batch from the current cursor position in the data."""
     base = np.zeros(shape=(self._batch_size, self._vocabulary_size),
                     dtype=np.float32)
     bot_answer_flags = np.zeros(shape=(self._batch_size, 1),
                                 dtype=np.float32)
     speaker_flags = np.zeros(shape=(self._batch_size,
                                     self._number_of_speakers),
                              dtype=np.float32)
     eod_flags = np.zeros(shape=(self._batch_size, 1), dtype=np.float32)
     for b in range(self._batch_size):
         base[b,
              char2id(self._text[self._cursor[b]], self.
                      _character_positions_in_vocabulary)] = 1.0
         speaker_flags[b, self._speaker_flags[self._cursor[b]]] = 1.0
         eod_flags[b, 0] = float(self._eod_flags[self._cursor[b]])
         bot_answer_flags[b, 0] = float(
             self._bot_answer_flags[self._cursor[b]])
         self._cursor[b] = (self._cursor[b] + 1) % self._text_size
     inputs = np.concatenate(
         (base, speaker_flags, bot_answer_flags, eod_flags), 1)
     labels = np.concatenate((base, bot_answer_flags), 1)
     return inputs, labels
Exemple #15
0
 def _start_batch(self):
     return np.array([[char2id('\n', self.character_positions_in_vocabulary)] for _ in range(self._batch_size)])
Exemple #16
0
 def _create_id_array(pairs, character_positions_in_vocabulary):
     number_of_pairs = len(pairs)
     ids = np.ndarray(shape=(number_of_pairs), dtype=np.int16)
     for p_idx, p in enumerate(pairs):
         ids[p_idx] = char2id(p, character_positions_in_vocabulary)
     return ids
Exemple #17
0
 def char2vec(char, characters_positions_in_vocabulary, speaker_idx, speaker_flag_size):
     return np.reshape(np.array([char2id(char, characters_positions_in_vocabulary)]), (1, 1, 1))
Exemple #18
0
def char_2_base_vec(character_positions_in_vocabulary, char):
    voc_size = len(character_positions_in_vocabulary)
    vec = np.zeros(shape=(1, voc_size), dtype=np.float32)
    vec[0, char2id(char, character_positions_in_vocabulary)] = 1.0
    return vec
Exemple #19
0
 def _start_batch(self):
     return np.array(
         [[char2id('\n', self.character_positions_in_vocabulary[0])] +
          [0] * MAX_NUM_PUNCTUATION_MARKS for _ in range(self._batch_size)])