def vectorize(questions, answers, chars=None): """Vectorize the questions and expected answers""" print('Vectorization...') chars = chars or CHARS x_maxlen = max(len(question) for question in questions) y_maxlen = max(len(answer) for answer in answers) # print (len(questions), x_maxlen, len(chars)) len_of_questions = len(questions) ctable = CharacterTable(chars) print("X = np_zeros") X = np_zeros((len_of_questions, x_maxlen, len(chars)), dtype=np.bool) print("for i, sentence in enumerate(questions):") for i in xrange(len(questions)): sentence = questions.pop() for j, c in enumerate(sentence): X[i, j, ctable.char_indices[c]] = 1 print("y = np_zeros") y = np_zeros((len_of_questions, y_maxlen, len(chars)), dtype=np.bool) print("for i, sentence in enumerate(answers):") for i in xrange(len(answers)): sentence = answers.pop() for j, c in enumerate(sentence): y[i, j, ctable.char_indices[c]] = 1 # Explicitly set apart 10% for validation data that we never train over split_at = len(X) - len(X) / 10 (X_train, X_val) = (slice_X(X, 0, split_at), slice_X(X, split_at)) (y_train, y_val) = (y[:split_at], y[split_at:]) print(X_train.shape) print(y_train.shape) return X_train, X_val, y_train, y_val, y_maxlen, ctable
def train(self, data_iterator): ''' Train a keras model on a worker and send asynchronous updates to parameter server ''' feature_iterator, label_iterator = tee(data_iterator, 2) x_train = np.asarray([x for x, y in feature_iterator]) y_train = np.asarray([y for x, y in label_iterator]) if x_train.size == 0: return model = model_from_yaml(self.yaml) nb_epoch = self.train_config['nb_epoch'] batch_size = self.train_config.get('batch_size') nb_train_sample = len(x_train[0]) nb_batch = int(np.ceil(nb_train_sample / float(batch_size))) index_array = np.arange(nb_train_sample) batches = [(i * batch_size, min(nb_train_sample, (i + 1) * batch_size)) for i in range(0, nb_batch)] if self.frequency == 'epoch': for epoch in range(nb_epoch): weights_before_training = get_server_weights(self.master_url) model.set_weights(weights_before_training) self.train_config['nb_epoch'] = 1 if x_train.shape[0] > batch_size: model.fit(x_train, y_train, show_accuracy=True, **self.train_config) weights_after_training = model.get_weights() deltas = subtract_params(weights_before_training, weights_after_training) put_deltas_to_server(deltas, self.master_url) elif self.frequency == 'batch': for epoch in range(nb_epoch): if x_train.shape[0] > batch_size: for (batch_start, batch_end) in batches: weights_before_training = get_server_weights( self.master_url) model.set_weights(weights_before_training) batch_ids = index_array[batch_start:batch_end] X = slice_X(x_train, batch_ids) y = slice_X(y_train, batch_ids) model.train_on_batch(X, y) weights_after_training = model.get_weights() deltas = subtract_params(weights_before_training, weights_after_training) put_deltas_to_server(deltas, self.master_url) else: print('Choose frequency to be either batch or epoch') yield []
def train(self, data_iterator): ''' Train a keras model on a worker and send asynchronous updates to parameter server ''' feature_iterator, label_iterator = tee(data_iterator, 2) x_train = np.asarray([x for x, y in feature_iterator]) y_train = np.asarray([y for x, y in label_iterator]) if x_train.size == 0: return model = model_from_yaml(self.yaml) nb_epoch = self.train_config['nb_epoch'] batch_size = self.train_config.get('batch_size') nb_train_sample = len(x_train[0]) nb_batch = int(np.ceil(nb_train_sample/float(batch_size))) index_array = np.arange(nb_train_sample) batches = [(i*batch_size, min(nb_train_sample, (i+1)*batch_size)) for i in range(0, nb_batch)] if self.frequency == 'epoch': for epoch in range(nb_epoch): weights_before_training = get_server_weights(self.master_url) model.set_weights(weights_before_training) self.train_config['nb_epoch'] = 1 if x_train.shape[0] > batch_size: model.fit(x_train, y_train, show_accuracy=True, **self.train_config) weights_after_training = model.get_weights() deltas = subtract_params(weights_before_training, weights_after_training) put_deltas_to_server(deltas, self.master_url) elif self.frequency == 'batch': for epoch in range(nb_epoch): if x_train.shape[0] > batch_size: for (batch_start, batch_end) in batches: weights_before_training = get_server_weights(self.master_url) model.set_weights(weights_before_training) batch_ids = index_array[batch_start:batch_end] X = slice_X(x_train, batch_ids) y = slice_X(y_train, batch_ids) model.train_on_batch(X, y) weights_after_training = model.get_weights() deltas = subtract_params(weights_before_training, weights_after_training) put_deltas_to_server(deltas, self.master_url) else: print('Choose frequency to be either batch or epoch') yield []
def predict_loop(model, data, batch_size=128, callbacks=[], log=print, f=None): if f is None: f = model._predict ins = [data[name] for name in model.input_order] nb_sample = len(ins[0]) outs = [] batches = km.make_batches(nb_sample, batch_size) index_array = np.arange(nb_sample) nb_batch = len(batches) for batch_index, (batch_start, batch_end) in enumerate(batches): if log is not None: s = progress(batch_index, nb_batch) if s is not None: log(s) for callback in callbacks: callback(batch_index, len(batches)) batch_ids = list(index_array[batch_start:batch_end]) ins_batch = km.slice_X(ins, batch_ids) batch_outs = f(*ins_batch) if type(batch_outs) != list: batch_outs = [batch_outs] if batch_index == 0: for batch_out in batch_outs: shape = (nb_sample, ) + batch_out.shape[1:] outs.append(np.zeros(shape)) for i, batch_out in enumerate(batch_outs): outs[i][batch_start:batch_end] = batch_out return dict(zip(model.output_order, outs))
def predict_loop(model, data, batch_size=128, callbacks=[], log=print, f=None): if f is None: f = model._predict ins = [data[name] for name in model.input_order] nb_sample = len(ins[0]) outs = [] batches = km.make_batches(nb_sample, batch_size) index_array = np.arange(nb_sample) nb_batch = len(batches) for batch_index, (batch_start, batch_end) in enumerate(batches): if log is not None: s = progress(batch_index, nb_batch) if s is not None: log(s) for callback in callbacks: callback(batch_index, len(batches)) batch_ids = list(index_array[batch_start:batch_end]) ins_batch = km.slice_X(ins, batch_ids) batch_outs = f(*ins_batch) if type(batch_outs) != list: batch_outs = [batch_outs] if batch_index == 0: for batch_out in batch_outs: shape = (nb_sample,) + batch_out.shape[1:] outs.append(np.zeros(shape)) for i, batch_out in enumerate(batch_outs): outs[i][batch_start:batch_end] = batch_out return dict(zip(model.output_order, outs))
def write_loop(ins, fun, write_fun, batch_size=128, callbacks=[], log=print): nb_sample = len(ins[0]) batches = km.make_batches(nb_sample, batch_size) index_array = np.arange(nb_sample) nb_batch = len(batches) for batch_index, (batch_start, batch_end) in enumerate(batches): if log is not None: s = progress(batch_index, nb_batch) if s is not None: log(s) for callback in callbacks: callback(batch_index, len(batches)) batch_ids = list(index_array[batch_start:batch_end]) ins_batch = km.slice_X(ins, batch_ids) batch_outs = fun(*ins_batch) write_fun(batch_outs, batch_start, batch_end)
print('Vectorization...') X = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool) y = np.zeros((len(questions), DIGITS + 1, len(chars)), dtype=np.bool) for i, sentence in enumerate(questions): X[i] = ctable.encode(sentence, maxlen=MAXLEN) for i, sentence in enumerate(expected): y[i] = ctable.encode(sentence, maxlen=DIGITS + 1) # Shuffle (X, y) in unison as the later parts of X will almost all be larger digits indices = np.arange(len(y)) np.random.shuffle(indices) X = X[indices] y = y[indices] # Explicitly set apart 10% for validation data that we never train over split_at = len(X) - len(X) / 10 (X_train, X_val) = (slice_X(X, 0, split_at), slice_X(X, split_at)) (y_train, y_val) = (y[:split_at], y[split_at:]) print(X_train.shape) print(y_train.shape) print('Build model...') model = Sequential() # "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE # note: in a situation where your input sequences have a variable length, # use input_shape=(None, nb_feature). model.add(RNN(HIDDEN_SIZE, input_shape=(None, len(chars)))) # For the decoder's input, we repeat the encoded input for each time step model.add(RepeatVector(DIGITS + 1)) # The decoder RNN could be multiple layers stacked or a single layer for _ in xrange(LAYERS):
X = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool) y = np.zeros((len(questions), DIGITS + 1, len(chars)), dtype=np.bool) for i, sentence in enumerate(questions): X[i] = ctable.encode(sentence, maxlen=MAXLEN) for i, sentence in enumerate(expected): y[i] = ctable.encode(sentence, maxlen=DIGITS + 1) # Shuffle (X, y) in unison as the later parts of X will almost all be larger digits indices = np.arange(len(y)) np.random.shuffle(indices) X = X[indices] y = y[indices] # Explicitly set apart 10% for validation data that we never train over split_at = len(X) - len(X) / 10 (X_train, X_val) = (slice_X(X, 0, split_at), slice_X(X, split_at)) (y_train, y_val) = (y[:split_at], y[split_at:]) print(X_train.shape) print(y_train.shape) print("Build model...") model = Sequential() # "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE # note: in a situation where your input sequences have a variable length, # use input_shape=(None, nb_feature). model.add(RNN(HIDDEN_SIZE, input_shape=(MAXLEN, len(chars)))) # For the decoder's input, we repeat the encoded input for each time step model.add(RepeatVector(DIGITS + 1)) # The decoder RNN could be multiple layers stacked or a single layer for _ in range(LAYERS):
print("Total addition questions:", len(questions)) print("Vectorization...") convertor = CharacterDataEngine(engine.get_character_set(), maxlen=MAXLEN) D_X = convertor.encode_dataset(questions, invert=True) D_y = convertor.encode_dataset(expected, maxlen=DIGITS + 1) # Shuffle (X, y) in unison as the later parts of X will almost all be larger digits indices = np.arange(len(D_y)) np.random.shuffle(indices) D_X = D_X[indices] D_y = D_y[indices] # Explicitly set apart 10% for validation data that we never train over split_at = len(D_X) - len(D_X) / 10 (D_X_train, D_X_val) = (slice_X(D_X, 0, split_at), slice_X(D_X, split_at)) (D_y_train, D_y_val) = (D_y[:split_at], D_y[split_at:]) print(D_X_train.shape) print(D_y_train.shape) import lx_layer.layer as L import lx_layer.recurrent as R import theano.printing as P from keras import activations, objectives from keras import models from keras.optimizers import Adam from keras import backend as K from util import initializations