def train(self, report_freq = 500, as_policy=True): #loss = C.ops.minus(0, C.ops.argmin(self.model) - C.ops.argmin(self.model) + C.ops.minus(self.label_var, 0)) loss = C.squared_error(self.model, self.label_var) evaluation = C.squared_error(self.model, self.label_var) schedule = C.momentum_schedule(self.hp.learning_rate) progress_printer = C.logging.ProgressPrinter(num_epochs=self.hp.epochs/self.hp.minibatch_size) learner = C.adam(self.model.parameters, C.learning_rate_schedule(self.hp.learning_rate, C.UnitType.minibatch), momentum=schedule, l1_regularization_weight=self.hp.l1reg, l2_regularization_weight=self.hp.l2reg ) trainer = C.Trainer(self.model, (loss, evaluation), learner, progress_printer) self.plotdata = {"loss":[]} for epoch in range(self.hp.epochs): indata, label, total_reward = self.get_next_data(self.hp.minibatch_size, as_policy) data = {self.input_var: indata, self.label_var: label} trainer.train_minibatch(data) loss = trainer.previous_minibatch_loss_average if not (loss == "NA"): self.plotdata["loss"].append(loss) if epoch % report_freq == 0: print() print("last epoch total reward: {}".format(total_reward)) trainer.summarize_training_progress() print() # if self.hp.stop_loss > loss: # break print() trainer.summarize_training_progress()
def train(streamf): input_var = cntk.input_variable(45,np.float32, name = 'features',dynamic_axes=cntk.axis.Axis.default_input_variable_dynamic_axes()) label_var=cntk.input_variable(3,np.float32, name = 'labels') net=nn(input_var) loss = cntk.squared_error(net,label_var) error=cntk.squared_error(net,label_var) learning_rate=0.02 lr_schedule=cntk.learning_rate_schedule(learning_rate,cntk.UnitType.minibatch) momentum_time_constant = cntk.momentum_as_time_constant_schedule(5000 / -np.math.log(0.9)) learner=cntk.fsadagrad(net.parameters,lr=lr_schedule,momentum = momentum_time_constant,unit_gain = True) progres=cntk.logging.ProgressPrinter(0) trainer=cntk.Trainer(net,(loss,error),[learner],progress_writers=progres) input_map={ input_var : streamf.streams.features, label_var : streamf.streams.labels } minibatch_size = 5000 num_samples_per_sweep = 2000 for i in range(0,num_samples_per_sweep): dat1=streamf.next_minibatch(minibatch_size,input_map = input_map) trainer.train_minibatch(dat1) training_loss = trainer.previous_minibatch_loss_average eval_error = trainer.previous_minibatch_evaluation_average if training_loss<0.002: break return trainer
def train(streamf): global net net=nn(input_var) loss = cntk.squared_error(net,label_var) error=cntk.squared_error(net,label_var) learning_rate=0.001 lr_schedule=cntk.learning_rate_schedule(learning_rate,cntk.UnitType.minibatch) momentum_time_constant = cntk.momentum_as_time_constant_schedule(700) learner=cntk.fsadagrad(net.parameters,lr=lr_schedule,momentum = momentum_time_constant,unit_gain = True) progres=cntk.logging.ProgressPrinter(0) trainer=cntk.Trainer(net,(loss,error),[learner],progress_writers=progres) input_map={ input_var : streamf.streams.features, label_var : streamf.streams.labels } minibatch_size = 512 max_epochs = 100 epoch_size = 48985 t = 0 for epoch in range(max_epochs): epoch_end = (epoch+1) * epoch_size while t < epoch_end: dat1=streamf.next_minibatch(minibatch_size,input_map = input_map) trainer.train_minibatch(dat1) t += dat1[label_var].num_samples trainer.summarize_training_progress() return trainer
def create_model(self): hidden_layers = [8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 16, 32] first_input = C.ops.splice(self._input, self._target) first_input_size = first_input.shape first_input = C.ops.reshape( first_input, (first_input_size[0], 1, first_input_size[1])) model = C.layers.Convolution2D((1, 3), num_filters=8, pad=True, reduction_rank=1, activation=C.ops.tanh)(first_input) print(model) for h in hidden_layers: input_new = C.ops.splice(model, first_input, axis=0) model = C.layers.Convolution2D((1, 3), num_filters=h, pad=True, reduction_rank=1, activation=C.ops.tanh)(input_new) print(model) ###### #model = C.ops.splice(model, self._target) # Dense layers direction = C.layers.Sequential([ C.layers.Dense(720, activation=C.ops.relu), C.layers.Dense(360, activation=None) ])(model) velocity = C.layers.Sequential([ C.layers.Dense(128, activation=C.ops.relu), C.layers.Dense(64, activation=None), C.layers.Dense(1, activation=None) ])(model) model = C.ops.splice(direction, velocity) if self._load_model: model = C.load_model(self._file_name) direction = model[0:360] velocity = model[360] C.logging.log_number_of_parameters(model) print(model) #loss = C.squared_error(direction, self._output) + C.squared_error(velocity, self._output_velocity) #error = C.squared_error(direction, self._output) + C.squared_error(velocity, self._output_velocity) loss = C.cross_entropy_with_softmax( direction, self._output) + C.squared_error(velocity, self._output_velocity) error = C.classification_error(direction, self._output) + C.squared_error( velocity, self._output_velocity) learner = C.adadelta(model.parameters, l2_regularization_weight=0.001) progress_printer = C.logging.ProgressPrinter(tag='Training') trainer = C.Trainer(model, (loss, error), learner, progress_printer) return model, loss, learner, trainer
def create_model(self): hidden_layers = [8, 8, 8, 8, 8, 8, 8, 8, 8] #first_input = C.ops.reshape( # C.ops.splice(self._input, self._target), # (1,self._input_size[0]*2,self._input_size[1])) #print(first_input) first_input = C.ops.reshape( self._input, (1, self._input_size[0], self._input_size[1])) model = C.layers.Convolution2D((1, 3), num_filters=8, pad=True, reduction_rank=1, activation=C.ops.tanh)(first_input) for h in hidden_layers: input_new = C.ops.splice(model, first_input) model = C.layers.Convolution2D((1, 3), num_filters=h, pad=True, reduction_rank=1, activation=C.ops.tanh)(input_new) model = C.ops.splice(model, self._target) ###### # Dense layers direction = C.layers.Sequential([ C.layers.Dense(256, activation=C.ops.relu), C.layers.Dense(128, activation=C.ops.relu), C.layers.Dense(64, activation=C.ops.relu), C.layers.Dense(32, activation=None), ])(model) velocity = C.layers.Sequential([ C.layers.Dense(128, activation=C.ops.relu), C.layers.Dense(64, activation=None), C.layers.Dense(1, activation=None) ])(model) model = C.ops.splice(direction, velocity) print(model) loss = C.cross_entropy_with_softmax( direction, self._output) + C.squared_error(velocity, self._output_velocity) error = C.classification_error(direction, self._output) + C.squared_error( velocity, self._output_velocity) learner = C.adadelta(model.parameters) progress_printer = C.logging.ProgressPrinter(tag='Training', freq=50) trainer = C.Trainer(model, (loss, error), learner, progress_printer) return model, loss, learner, trainer
def train(self): tmp_d = {"x": [], "y": []} num_list = [] count = 0 for idx, value in enumerate(self.series): if idx % self.h_dims == 0: num_list = [] count += 1 if (self.h_dims * count) > len(self.series): break num_list.append(np.float32(value)) increment_list = [] for num in num_list: increment_list.append(num) tmp_d["x"].append(np.array(increment_list)) tmp_d["y"].append( np.array([np.float32(self.series[self.h_dims * count])])) x = {"train": tmp_d["x"]} y = {"train": np.array(tmp_d["y"])} z = self.create_model(self.input_node, self.h_dims) var_l = cntk.input_variable(1, dynamic_axes=z.dynamic_axes, name="y") learning_rate = 0.005 lr_schedule = cntk.learning_parameter_schedule(learning_rate) loss = cntk.squared_error(z, var_l) error = cntk.squared_error(z, var_l) momentum_schedule = cntk.momentum_schedule( 0.9, minibatch_size=self.batch_size) learner = cntk.fsadagrad(z.parameters, lr=lr_schedule, momentum=momentum_schedule) trainer = cntk.Trainer(z, (loss, error), [learner]) # training loss_summary = [] start = time.time() for epoch in range(0, self.epochs): for x_batch, l_batch in self.next_batch(x, y, "train", self.batch_size): trainer.train_minibatch({ self.input_node: x_batch, var_l: l_batch }) if epoch % (self.epochs / 10) == 0: training_loss = trainer.previous_minibatch_loss_average loss_summary.append(training_loss) print("epoch: {}, loss: {:.4f} [time: {:.1f}s]".format( epoch, training_loss, time.time() - start)) return z
def lstm_basic(x, y, epochs=1000, batch_size=100, input_dim=5): x_axes = [C.Axis.default_batch_axis(), C.Axis.default_dynamic_axis()] C.input_variable(1, dynamic_axes=x_axes) # input sequences input_seq = C.sequence.input_variable(1) # create the model z = create_model(input_seq, input_dim) # expected output (label), also the dynamic axes of the model output # is specified as the model of the label input lb = C.input_variable(1, dynamic_axes=z.dynamic_axes, name="y") # the learning rate learning_rate = 0.02 lr_schedule = C.learning_parameter_schedule(learning_rate) # loss function loss = C.squared_error(z, lb) # use squared error to determine error for now error = C.squared_error(z, lb) # use fsadagrad optimizer momentum_schedule = C.momentum_schedule(0.9, minibatch_size=batch_size) learner = C.fsadagrad(z.parameters, lr=lr_schedule, momentum=momentum_schedule, unit_gain=True) trainer = C.Trainer(z, (loss, error), [learner]) # train loss_summary = [] start = time.time() for epoch in range(0, epochs): for x1, y1 in next_batch(x, y, "train", batch_size): trainer.train_minibatch({input_seq: x1, lb: y1}) if epoch % (epochs / 10) == 0: training_loss = trainer.previous_minibatch_loss_average loss_summary.append(training_loss) print("epoch: {}, loss: {:.4f} [time: {:.1f}s]".format( epoch, training_loss, time.time() - start)) print("training took {0:.1f} sec".format(time.time() - start)) return z, trainer, input_seq
def test_composite_source_synced_transforms(tmpdir): from PIL import Image np.random.seed(1) tmpmap = str(tmpdir/'sync_test.map') with open(tmpmap, 'w') as f: for i in range(10): data = np.random.randint(0, 2**8, (224,224,3)) image = Image.fromarray(data.astype('uint8'), "RGB") tmpjpg = str(tmpdir/('%d.jpg'%i)) image.save(tmpjpg) f.write("%s\t0\n"%tmpjpg) def create_reader(map_file1, map_file2): transforms = [xforms.crop(crop_type='randomside', side_ratio=0.8, jitter_type='uniratio'), xforms.scale(width=224, height=224, channels=3, interpolations='linear')] source1 = C.io.ImageDeserializer(map_file1, C.io.StreamDefs( source_image = C.io.StreamDef(field='image', transforms=transforms))) source2 = C.io.ImageDeserializer(map_file2, C.io.StreamDefs( target_image = C.io.StreamDef(field='image', transforms=transforms))) return C.io.MinibatchSource([source1, source2], max_samples=sys.maxsize, randomize=True, multithreaded_deserializer=False) x = C.input_variable((3,224,224)) y = C.input_variable((3,224,224)) loss = C.squared_error(x, y) reader = create_reader(tmpmap, tmpmap) minibatch_size = 2 input_map={ x: reader.streams.source_image, y: reader.streams.target_image } for i in range(30): data=reader.next_minibatch(minibatch_size, input_map=input_map) assert np.allclose(loss.eval(data), np.zeros(minibatch_size))
def _build_network(self, pretrained_policy): self.image_frame = C.input_variable((self.num_frames_to_stack, ) + self.observation_space_shape) self.target_current_state_value = C.input_variable((1, )) if pretrained_policy is None: h = C.layers.Convolution2D(filter_shape=(7, 7), num_filters=32, strides=(4, 4), pad=True, name='conv_1', activation=C.relu)(self.image_frame) h = C.layers.Convolution2D(filter_shape=(5, 5), num_filters=64, strides=(2, 2), pad=True, name='conv_2', activation=C.relu)(h) h = C.layers.Convolution2D(filter_shape=(3, 3), num_filters=128, strides=(1, 1), pad=True, name='conv_3', activation=C.relu)(h) h = C.layers.Dense(64, activation=C.relu, name='dense_1')(h) self.value = C.layers.Dense(1, name='dense_2')(h) else: self.value = C.Function.load(pretrained_policy)(self.image_frame) self.loss = C.squared_error(self.target_current_state_value, self.value)
def create_model(self): hidden_layers = [8,8,8,8,8,8,8,8,8] first_input = C.ops.reshape( C.ops.splice(self._input,self._target), (1,self._input_size[0]*2,self._input_size[1])) print(first_input) model = C.layers.Convolution2D( (1,3), num_filters=8, pad=True, reduction_rank=1, activation=C.ops.tanh)(first_input) print(model) for h in hidden_layers: input_new = C.ops.splice(model,first_input) model = C.layers.Convolution2D( (1,3), num_filters=h, pad=True, reduction_rank=1, activation=C.ops.tanh)(input_new) print(model) ###### # Dense layers direction = C.layers.Sequential([ C.layers.Dense(256, activation=C.ops.relu), C.layers.Dense(128, activation=C.ops.relu), C.layers.Dense(64, activation=C.ops.relu), C.layers.Dense(32, activation=None), ])(model) velocity = C.layers.Sequential([ C.layers.Dense(128,activation=C.ops.relu), C.layers.Dense(64,activation=None), C.layers.Dense(1,activation=None) ])(model) model = C.ops.splice(direction,velocity) if self._load_model: model = C.load_model('dnns/GRP_f.dnn') direction = model[0:32] velocity = model[32] print (model) loss = C.cross_entropy_with_softmax(direction, self._output) + C.squared_error(velocity, self._output_velocity) error = C.classification_error(direction, self._output) + C.squared_error(velocity, self._output_velocity) learner = C.adadelta(model.parameters, l2_regularization_weight=0.001) progress_printer = C.logging.ProgressPrinter(tag='Training') trainer = C.Trainer(model, (loss,error), learner, progress_printer) return model, loss, learner, trainer
def main(): show_image = False sigma_r = 8 grid_sz = 64 if show_image: sz = 256 n_chans = 3 bs = 1 data = skio.imread("/data/rgb.png").mean(2)[:sz, :sz].astype( np.float32) data = np.expand_dims(data / 255.0, 0) n_epochs = 1000 lr = 0.001 else: sz = 1024 n_chans = 3 bs = 4 N = 4 data = np.random.uniform(size=[N, sz, sz]).astype(np.float32) n_epochs = 50 lr = 0.000000001 imdata = np.tile(np.expand_dims(data, 1), [1, n_chans, 1, 1]) im = C.input_variable([n_chans, sz, sz], needs_gradient=True) guide = C.input_variable([sz, sz], needs_gradient=True) guide_no_grad = C.input_variable([sz, sz], needs_gradient=False) model = BilateralSlice(sz, n_chans, n_chans, sigma_r=sigma_r, grid_sz=grid_sz) out = model(im, guide, guide_no_grad) svg = C.logging.graph.plot(out, "/output/graph.svg") if show_image: # --- Show output ----------------------------------------------------------- inputs = {im: imdata[0], guide: data[0], guide_no_grad: data[0]} out_ = out.eval(inputs) out_ = np.clip(np.transpose(np.squeeze(out_), [1, 2, 0]), 0, 1) skio.imsave("/output/imout.png", out_) else: # --- Train ----------------------------------------------------------------- loss = C.squared_error(out, im) C.debugging.profiler.start_profiler("/output/pyprof") C.debugging.profiler.enable_profiler() learner = C.sgd(model.parameters, C.learning_parameter_schedule(lr)) progress_writer = C.logging.ProgressPrinter(0) begin = time.time() summary = loss.train((imdata, data, data), parameter_learners=[learner], callbacks=[progress_writer], max_epochs=n_epochs, minibatch_size=bs) end = time.time() runtime = (end - begin) * 1000.0 / n_epochs print('Runtime:', runtime) C.debugging.profiler.stop_profiler()
def train(create_model, X, Y, epochs=500, batch_size=10, N=1): dim = Y.shape[1] # input sequences x = C.sequence.input_variable(dim) # create the model z = create_model(x, N=N, outputs=dim) # expected output (label), also the dynamic axes of the model output # is specified as the model of the label input l = C.input_variable(dim, dynamic_axes=z.dynamic_axes, name="y") # the learning rate learning_rate = 0.02 lr_schedule = C.learning_parameter_schedule(learning_rate) # loss function loss = C.squared_error(z, l) # use squared error to determine error for now error = C.squared_error(z, l) # use fsadagrad optimizer momentum_schedule = C.momentum_schedule(0.9, minibatch_size=batch_size) learner = C.fsadagrad(z.parameters, lr=lr_schedule, momentum=momentum_schedule, unit_gain=True) trainer = C.Trainer(z, (loss, error), [learner]) # train loss_summary = [] start = time.time() for epoch in range(0, epochs): for x1, y1 in next_batch(X, Y, batch_size): trainer.train_minibatch({x: x1, l: y1}) if epoch % (epochs / 10) == 0: training_loss = trainer.previous_minibatch_loss_average loss_summary.append(training_loss) print("epoch: {}, loss: {:.5f}".format(epoch, training_loss)) print("training took {0:.1f} sec".format(time.time() - start)) return z
def _build_network(self, pretrained_policy): self.input = C.input_variable(self.observation_space_shape) self.target_current_state_value = C.input_variable((1, )) if pretrained_policy is None: h = C.layers.Dense(64, activation=C.relu, name='dense_1')(self.input) h = C.layers.Dense(64, activation=C.relu, name='dense_2')(h) self.value = C.layers.Dense(1, name='dense_3')(h) else: self.value = C.Function.load(pretrained_policy)(self.input) self.loss = C.squared_error(self.target_current_state_value, self.value)
def __init__(self): self.X = C.input_variable(shape=(1, )) self.h = C.layers.Dense(1, activation=None, init=C.uniform(1), bias=False)(self.X) self.pred = C.layers.Dense(1, activation=None, init=C.uniform(1), bias=False)(self.h) self.y = C.input_variable(shape=(1, )) self.loss = C.squared_error(self.pred, self.y)
def create_model(self): hidden_layers = [8,8,8,8,8,8,8,8,8] first_input = C.ops.splice(self._input,self._target) i_size = first_input.shape first_input = C.ops.reshape(first_input,(i_size[0],1,i_size[1])) model = C.layers.Convolution2D((1,3), num_filters=8, pad=True, reduction_rank=1,activation=C.ops.tanh)(first_input) print (model) for i, h in enumerate(hidden_layers): input_new = C.ops.splice(model, first_input,axis=0) model = C.layers.Convolution2D((1,3), num_filters=h, pad=True, reduction_rank=1, activation=C.ops.tanh, name='c_{}'.format(h))(input_new) print(model) model = C.layers.BatchNormalization()(model) model = C.layers.Dropout(0.1)(model) model = C.ops.splice(model, self._target) direction = C.layers.Sequential([ C.layers.Recurrence(C.layers.LSTM(720)), C.layers.Dense(360, activation=None) ]) (model) velocity = C.layers.Sequential([ C.layers.Recurrence(C.layers.LSTM(128)), C.layers.Dense(64,activation=C.ops.tanh), C.layers.Dense(1, activation=None) ])(model) model = C.ops.splice(direction,velocity) if self._load_model: model = C.load_model(self._file_name) direction = model[0:360] velocity = model[360] C.logging.log_number_of_parameters(model) print (model) #loss = C.cross_entropy_with_softmax(direction, self._output) + C.squared_error(velocity,self._output_velocity) + C.ops.relu(1-C.ops.log(C.reduce_min(self._input) + 1 -self._safe_dist)) loss = C.cross_entropy_with_softmax(direction, self._output) + C.squared_error(velocity,self._output_velocity) error = C.classification_error(direction, self._output) + C.squared_error(velocity,self._output_velocity) learner = C.adadelta(model.parameters, l2_regularization_weight=0.001) progress_printer = C.logging.ProgressPrinter(tag='Training', freq=20) trainer = C.Trainer(model,(loss,error), learner, progress_printer) return model, loss, learner, trainer
def _build_network(self, pretrained_policy): self.image_frame = C.input_variable((1, ) + self.observation_space_shape) self.next_image_frame = C.input_variable((1, ) + self.observation_space_shape) self.advantage = C.input_variable((1, )) self.action_index = C.input_variable((1, )) self.target_value = C.input_variable((1, )) one_hot_action = C.one_hot(self.action_index, self.num_actions) if pretrained_policy is None: h = C.layers.Convolution2D(filter_shape=(7, 7), num_filters=32, strides=(4, 4), pad=True, name='conv_1', activation=C.relu) h = C.layers.Convolution2D(filter_shape=(5, 5), num_filters=64, strides=(2, 2), pad=True, name='conv_2', activation=C.relu)(h) h = C.layers.Convolution2D(filter_shape=(3, 3), num_filters=128, strides=(1, 1), pad=True, name='conv_3', activation=C.relu)(h) h = C.layers.Dense(64, activation=C.relu, name='dense_1')(h) self.probabilities = C.layers.Dense(self.num_actions, name='dense_2', activation=C.softmax)(h( self.image_frame)) v = C.layers.Dense(1, name='dense_3')(h) self.value = v(self.image_frame) self.next_value = v(self.next_image_frame) self.output = C.combine( [self.probabilities, self.value, self.next_value]) else: [self.probabilities, self.value, self.next_value] = list( C.Function.load(pretrained_policy)(self.image_frame, self.next_image_frame)) self.values_output = C.combine([self.value, self.next_value]) selected_action_probablity = C.ops.times_transpose( self.probabilities, one_hot_action) self.log_probability = C.ops.log(selected_action_probablity) self.actor_loss = -self.advantage * self.log_probability self.critic_loss = C.squared_error(self.target_value, self.value) self.loss = 0.5 * self.actor_loss + 0.5 * self.critic_loss
def test_clone_freeze(): inputs = 3 outputs = 5 features = C.input_variable((inputs), np.float32) label = C.input_variable((outputs), np.float32) weights = C.parameter((inputs, outputs)) const_weights = C.constant(weights.value) z = C.times(features, weights) c = C.times(features, const_weights) z_clone = z.clone('freeze') c_clone = c.clone('freeze') # check that z and z_clone are the same for p, q in zip(z.parameters, z_clone.constants): assert np.array_equal(p.value, q.value) # check that c and c_clone are the same for p, q in zip(c.constants, c_clone.constants): assert np.array_equal(p.value, q.value) # keep copies of the old values z_copies = [q.value for q in z_clone.constants] c_copies = [q.value for q in c_clone.constants] # update z trainer = C.Trainer( z, C.squared_error(z, label), C.sgd(z.parameters, C.learning_parameter_schedule(1.0))) x = np.random.randn(16, 3).astype('f') y = np.random.randn(16, 5).astype('f') trainer.train_minibatch({features: x, label: y}) # update c for cc in c.constants: cc.value = np.random.randn(*cc.value.shape).astype('f') # check that z changed for p, q in zip(z.parameters, z_clone.constants): assert not np.array_equal(p.value, q.value) # check that z_clone did not change for p, q in zip(z_copies, z_clone.constants): assert np.array_equal(p, q.value) # check that c changed for p, q in zip(c.constants, c_clone.constants): assert not np.array_equal(p.value, q.value) # check that c_clone did not change for p, q in zip(c_copies, c_clone.constants): assert np.array_equal(p, q.value)
def test_clone_freeze(): inputs = 3 outputs = 5 features = C.input_variable((inputs), np.float32) label = C.input_variable((outputs), np.float32) weights = C.parameter((inputs, outputs)) const_weights = C.constant(weights.value) z = C.times(features, weights) c = C.times(features, const_weights) z_clone = z.clone('freeze') c_clone = c.clone('freeze') # check that z and z_clone are the same for p, q in zip(z.parameters, z_clone.constants): assert np.array_equal(p.value, q.value) # check that c and c_clone are the same for p, q in zip(c.constants, c_clone.constants): assert np.array_equal(p.value, q.value) # keep copies of the old values z_copies = [q.value for q in z_clone.constants] c_copies = [q.value for q in c_clone.constants] # update z trainer = C.Trainer(z, C.squared_error(z, label), C.sgd(z.parameters, C.learning_rate_schedule(1.0, C.UnitType.minibatch))) x = np.random.randn(16,3).astype('f') y = np.random.randn(16,5).astype('f') trainer.train_minibatch({features: x, label: y}) # update c for cc in c.constants: cc.value = np.random.randn(*cc.value.shape).astype('f') # check that z changed for p, q in zip(z.parameters, z_clone.constants): assert not np.array_equal(p.value, q.value) # check that z_clone did not change for p, q in zip(z_copies, z_clone.constants): assert np.array_equal(p, q.value) # check that c changed for p, q in zip(c.constants, c_clone.constants): assert not np.array_equal(p.value, q.value) # check that c_clone did not change for p, q in zip(c_copies, c_clone.constants): assert np.array_equal(p, q.value)
def test_RAdam(): beta2 = 0.999 a = C.input_variable(shape=(1,)) c = Dense(shape=(1,))(a) z = adam_exponential_warmup_schedule(1, beta2) loss = C.squared_error(a, c) adam = RAdam(c.parameters, 1, 0.912, beta2=beta2, epoch_size=3) trainer = C.Trainer(c, (loss, ), [adam]) n = np.random.random((3, 1)) for i in range(10_000): assert z[i] == adam.learning_rate() # print(f"iter: {i}, lr: {adam.learning_rate()}") trainer.train_minibatch({a: n})
def build_trainer(self): # Set the learning rate, and the momentum parameters for the Adam optimizer. lr = learning_rate_schedule(self.lr, UnitType.minibatch) beta1 = momentum_schedule(0.9) beta2 = momentum_schedule(0.99) # Calculate the losses. loss_on_v = cntk.squared_error(self.R, self.v) pi_a_s = cntk.log(cntk.times_transpose(self.pi, self.action)) loss_on_pi = cntk.variables.Constant(-1) * (cntk.plus( cntk.times(pi_a_s, cntk.minus(self.R, self.v_calc)), 0.01 * cntk.times_transpose(self.pi, cntk.log(self.pi)))) #loss_on_pi = cntk.times(pi_a_s, cntk.minus(self.R, self.v_calc)) self.tensorboard_v_writer = TensorBoardProgressWriter( freq=10, log_dir="tensorboard_v_logs", model=self.v) self.tensorboard_pi_writer = TensorBoardProgressWriter( freq=10, log_dir="tensorboard_pi_logs", model=self.pi) # tensorboard --logdir=tensorboard_pi_logs http://localhost:6006/ # tensorboard --logdir=tensorboard_v_logs http://localhost:6006/ # Create the trainiers. self.trainer_v = cntk.Trainer(self.v, (loss_on_v), [ adam(self.pms_v, lr, beta1, variance_momentum=beta2, gradient_clipping_threshold_per_sample=2, l2_regularization_weight=0.01) ], self.tensorboard_v_writer) self.trainer_pi = cntk.Trainer(self.pi, (loss_on_pi), [ adam(self.pms_pi, lr, beta1, variance_momentum=beta2, gradient_clipping_threshold_per_sample=2, l2_regularization_weight=0.01) ], self.tensorboard_pi_writer)
def config_parameter(self, hidden_layers_dim, learning_rate, minibatch_size, num_train_samples_per_sweep, num_test_samples, result_file_name): self.input_dim = 3 self.num_label_classes = 3 self.input = cntk.input_variable(self.input_dim) self.label = cntk.input_variable(self.num_label_classes) self.hidden_layers_dim = hidden_layers_dim self.z = self.create_model(self.input, self.hidden_layers_dim) self.loss = cntk.squared_error(self.z, self.label) # self.learning_rate = [0.0002] * 5000000 + [0.00002] * 4000000 + [ 0.00001 ] * 2000000 + [0.000005] * 848824 #self.learning_rate = learning_rate self.lr_schedule = cntk.learning_rate_schedule(self.learning_rate, cntk.UnitType.minibatch) self.learner = cntk.sgd(self.z.parameters, self.lr_schedule) self.trainer = cntk.Trainer(self.z, (self.loss, self.loss), [self.learner]) self.minibatch_size = minibatch_size self.num_train_samples_per_sweep = num_train_samples_per_sweep num_sweeps_to_train_with = 10 self.num_minibatches_to_train = ( self.num_train_samples_per_sweep * num_sweeps_to_train_with) / self.minibatch_size self.num_test_samples = num_test_samples self.file_name = result_file_name # Name of result file self.reader_train = self.create_reader( self.train_file, True, self.input_dim, self.num_label_classes, self.num_train_samples_per_sweep) self.input_map = { self.label: self.reader_train.streams.labels, self.input: self.reader_train.streams.features }
def train_mse_cntk(x, y, model, train_gen, val_gen, epochs, val_steps): loss_function = cntk.squared_error(model, y) accuracy_function = loss_function learner = cntk.adam(model.parameters, cntk.learning_parameter_schedule_per_sample(0.001), cntk.learning_parameter_schedule_per_sample(0.9)) trainer = cntk.Trainer(model, (loss_function, accuracy_function), [learner]) evaluator = cntk.Evaluator(accuracy_function) history = fit_generator(x, y, model=model, trainer=trainer, evaluator=evaluator, train_gen=train_gen, steps_per_epoch=500, epochs=epochs, val_gen=val_gen, validation_steps=val_steps) plot_results(history)
def _build_network(self, pretrained_policy): self.image_frame = C.input_variable((1, ) + self.observation_space_shape) self.next_image_frame = C.input_variable((1, ) + self.observation_space_shape) self.reward = C.input_variable((1, )) if pretrained_policy is None: h = C.layers.Convolution2D(filter_shape=(7, 7), num_filters=32, strides=(4, 4), pad=True, name='conv_1', activation=C.relu) h = C.layers.Convolution2D(filter_shape=(5, 5), num_filters=64, strides=(2, 2), pad=True, name='conv_2', activation=C.relu)(h) h = C.layers.Convolution2D(filter_shape=(3, 3), num_filters=128, strides=(1, 1), pad=True, name='conv_3', activation=C.relu)(h) h = C.layers.Dense(64, activation=C.relu, name='dense_1')(h) v = C.layers.Dense(1, name='dense_2')(h) self.value = v(self.image_frame) self.next_value = v(self.next_image_frame) self.output = C.combine([self.value, self.next_value]) else: self.output = C.Function.load(pretrained_policy)( self.image_frame, self.next_image_frame) [self.value, self.next_value] = self.output[ self.value.output], self.output[self.next_value.output] target = DISCOUNT_FACTOR * self.next_value + self.reward self.loss = C.squared_error(target, self.value)
def main(): # We keep upto 14 inputs from a day TIMESTEPS = int(input("TIMESTEPS: ")) # 20000 is the maximum total output in our dataset. We normalize all values with # this so our inputs are between 0.0 and 1.0 range. NORMALIZE = int(input("NORMALIZE: ")) # process batches of 10 days BATCH_SIZE = int(input("BATCH_SIZE: ")) BATCH_SIZE_TEST = int(input("BATCH_SIZE_TEST: ")) # Specify the internal-state dimensions of the LSTM cell H_DIMS = int(input("H_DIMS: ")) data_source = input("Source(1=solar,2=local,3=sin,4=my): ") if data_source == "1" or data_source == "": X, Y = get_solar_old(TIMESTEPS, NORMALIZE) elif data_source == "2": X, Y = get_solar(TIMESTEPS, NORMALIZE) elif data_source == "3": X, Y = get_sin(5, 5, input("Data length: ")) else: X, Y = get_my_data(H_DIMS, H_DIMS) epochs = input("Epochs: ") if epochs == "": EPOCHS = 100 else: EPOCHS = int(epochs) start_time = time.time() # input sequences x = C.sequence.input_variable(1) model_file = "{}_epochs.model".format(EPOCHS) if not os.path.exists(model_file): print("Training model {}...".format(model_file)) # create the model z = create_model(x, H_DIMS) # expected output (label), also the dynamic axes of the model output # is specified as the model of the label input var_l = C.input_variable(1, dynamic_axes=z.dynamic_axes, name="y") # the learning rate learning_rate = 0.005 lr_schedule = C.learning_parameter_schedule(learning_rate) # loss function loss = C.squared_error(z, var_l) # use squared error to determine error for now error = C.squared_error(z, var_l) # use adam optimizer momentum_schedule = C.momentum_schedule(0.9, minibatch_size=BATCH_SIZE) learner = C.fsadagrad(z.parameters, lr=lr_schedule, momentum=momentum_schedule) trainer = C.Trainer(z, (loss, error), [learner]) # training loss_summary = [] start = time.time() for epoch in range(0, EPOCHS): for x_batch, l_batch in next_batch(X, Y, "train", BATCH_SIZE): trainer.train_minibatch({x: x_batch, var_l: l_batch}) if epoch % (EPOCHS / 10) == 0: training_loss = trainer.previous_minibatch_loss_average loss_summary.append(training_loss) print("epoch: {}, loss: {:.4f}".format(epoch, training_loss)) print("Training took {:.1f} sec".format(time.time() - start)) # Print the train, validation and test errors for labeltxt in ["train", "val", "test"]: print("mse for {}: {:.6f}".format( labeltxt, get_mse(trainer, x, X, Y, BATCH_SIZE, var_l, labeltxt))) z.save(model_file) else: z = C.load_model(model_file) x = cntk.logging.find_all_with_name(z, "")[-1] # Print out all layers in the model print("Loading {} and printing all nodes:".format(model_file)) node_outputs = cntk.logging.find_all_with_name(z, "") for n in node_outputs: print(" {}".format(n)) # predict # f, a = plt.subplots(2, 1, figsize=(12, 8)) for j, ds in enumerate(["val", "test"]): fig = plt.figure() a = fig.add_subplot(2, 1, 1) results = [] for x_batch, y_batch in next_batch(X, Y, ds, BATCH_SIZE_TEST): pred = z.eval({x: x_batch}) results.extend(pred[:, 0]) # because we normalized the input data we need to multiply the prediction # with SCALER to get the real values. a.plot((Y[ds] * NORMALIZE).flatten(), label=ds + " raw") a.plot(np.array(results) * NORMALIZE, label=ds + " pred") a.legend() fig.savefig("{}_chart_{}_epochs.jpg".format(ds, EPOCHS)) print("Delta: ", time.time() - start_time)
def main(): print("version", C.__version__) bs = 1 n_chans = 1 sigma_s = 16 sigma_r = 12 # 4x4x1024x1024 # 4x12x64x64 sz = 256 # sz = 1024 small_sz = sz // sigma_s yy, xx = np.meshgrid(np.arange(0, sz), np.arange(0, sz)) cc, bb = np.meshgrid(np.arange(0, n_chans), np.arange(0, bs)) xx = np.expand_dims(xx, 0) xx = np.expand_dims(xx, 0) yy = np.expand_dims(yy, 0) yy = np.expand_dims(yy, 0) bb = np.expand_dims(bb, 2) bb = np.expand_dims(bb, 3) cc = np.expand_dims(cc, 2) cc = np.expand_dims(cc, 3) # Compute graph grid = C.Parameter([bs, n_chans, sigma_r, small_sz, small_sz], ) # grid = C.input_variable( # [bs, n_chans, sigma_r, small_sz, small_sz], # dynamic_axes=[], needs_gradient=True) guide = C.input_variable([bs, sz, sz], dynamic_axes=[], needs_gradient=True) guide_non_diff = C.input_variable([bs, sz, sz], dynamic_axes=[]) # Coordinates xx = C.Constant(xx, xx.shape) yy = C.Constant(yy, yy.shape) cc = C.Constant(cc, cc.shape) bb = C.Constant(bb, bb.shape) gx_d, gy_d, gz_d, fx_d, fy_d, fz_d, _, _, _ = grid_coord( guide, xx, yy, sz, small_sz, sigma_r, bs) # Trilerp weights wx = (gx_d - 0.5 - fx_d) wy = (gy_d - 0.5 - fy_d) wz = C.abs(gz_d - 0.5 - fz_d) # Enclosing cell gx, gy, gz, fx, fy, fz, cx, cy, cz = grid_coord(guide_non_diff, xx, yy, sz, small_sz, sigma_r, bs) output_components = [] for ix, x in enumerate([fx, cx]): wx_ = (1 - wx) if ix == 0 else wx for iy, y in enumerate([fy, cy]): wy_ = (1 - wy) if iy == 0 else wy for iz, z in enumerate([fz, cz]): wz_ = (1 - wz) if iz == 0 else wz linear_idx = x + small_sz * (y + small_sz * (z + sigma_r * (cc + n_chans * bb))) # Flatten data for gather op flat_grid = C.reshape( grid, [bs * small_sz * small_sz * sigma_r * n_chans]) flat_linear_idx = C.reshape(linear_idx, [bs * n_chans * sz * sz]) # Slice interp = C.gather(flat_grid, flat_linear_idx) interp_fsz = C.reshape(interp, [bs, n_chans, sz, sz]) output_components.append(interp_fsz * wz_ * wx_ * wy_) out = sum(output_components) loss = C.squared_error(out, guide) # svg = C.logging.graph.plot(out, "/output/graph.svg") grid_data = np.random.uniform(size=(bs, n_chans, sigma_r, small_sz, small_sz)).astype(np.float32) # guide_data = np.random.uniform( # size=(bs, sz, sz)).astype(np.float32) guide_data = skio.imread("/data/rgb.png").mean(2)[:sz, :sz].astype( np.float32) guide_data = np.expand_dims(guide_data, 0) / 255.0 inputs = {guide: guide_data, guide_non_diff: guide_data}
def create_model(self): modeli = C.layers.Sequential([ # Convolution layers C.layers.Convolution2D((1, 3), num_filters=8, pad=True, reduction_rank=0, activation=C.ops.tanh, name='conv_a'), C.layers.Convolution2D((1, 3), num_filters=16, pad=True, reduction_rank=1, activation=C.ops.tanh, name='conv2_a'), C.layers.Convolution2D((1, 3), num_filters=32, pad=False, reduction_rank=1, activation=C.ops.tanh, name='conv3_a'), ###### # Dense layers #C.layers.Dense(128, activation=C.ops.relu,name='dense1_a'), #C.layers.Dense(64, activation=C.ops.relu,name='dense2_a'), C.layers.Dense(361, activation=C.ops.relu, name='dense3_a') ])(self._input) ### target modelt = C.layers.Sequential( [C.layers.Dense(360, activation=C.ops.relu, name='dense4_a')])(self._target) ### concatenate both processed target and observations inputs = C.ops.splice(modeli, modelt) ### Use input to predict next hidden state, and generate ### next observation model = C.layers.Sequential([ ###### C.layers.Dense(720, activation=C.ops.relu, name='dense5_a'), # Recurrence C.layers.Recurrence(C.layers.LSTM(2048, init=C.glorot_uniform()), name='lstm_a'), C.layers.Dense(1024, activation=None) ])(inputs) ###### # Prediction direction = C.layers.Sequential([ C.layers.Dense(720, activation=None, name='dense6_a'), C.layers.Dense(360, activation=C.ops.softmax, name='dense7_a') ])(model) velocity = C.layers.Sequential([ C.layers.Dense(128, activation=C.ops.relu), C.layers.Dense(64, activation=None), C.layers.Dense(1, activation=None) ])(model) model = C.ops.splice(direction, velocity) if self._load_model: model = C.load_model('dnns/action_predicter_f.dnn') direction = model[0:360] velocity = model[360] print(model) loss = C.squared_error(direction, self._output) + C.squared_error( velocity, self._output_velocity) error = C.classification_error(direction, self._output) + C.squared_error( velocity, self._output_velocity) learner = C.adadelta(model.parameters, l2_regularization_weight=0.001) progress_printer = C.logging.ProgressPrinter(tag='Training') trainer = C.Trainer(model, (loss, error), learner, progress_printer) return model, loss, learner, trainer
#Venue part xv = C.sequence.input_variable((1, 2316, VEC_DIM)) hv_conv = conv_model(xv) #Event part xe = C.sequence.input_variable((1, 2826, VEC_DIM)) he_conv = conv_model(xe) #Ground Truth Success label target = C.sequence.input_variable(1, np.float32) #Predicted success label of target event venue_model = C.cosine_distance(hv_conv, he_conv, name="simi") #Squared loss venue_loss = C.squared_error(target, venue_model) #Squared error venue_error = C.squared_error(target, venue_model) lr_per_sample = [LEARNING_RATE] lr_schedule = C.learners.learning_rate_schedule(lr_per_sample, C.learners.UnitType.sample, epoch_size=10) momentum_as_time_constant = C.learners.momentum_as_time_constant_schedule(700) # use adam optimizer venue_learner = C.learners.adam(venue_model.parameters, lr=lr_schedule, momentum=momentum_as_time_constant) trainer = C.train.Trainer(venue_model, (venue_loss, venue_error),
import cntk as C import numpy as np import pandas as pd x = C.input_variable(2) y = C.input_variable(2) x0 = np.asarray([[2., 1.]], dtype=np.float32) y0 = np.asarray([[4., 6.]], dtype=np.float32) res = C.squared_error(x, y).eval({x: x0, y: y0}) print type(res)
# input sequences x = C.sequence.input_variable(1) # create the model z = create_model(x) # expected output (label), also the dynamic axes of the model output # is specified as the model of the label input l = C.input_variable(1, dynamic_axes=z.dynamic_axes, name="y") print l # the learning rate learning_rate = 0.02 lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch) # loss function loss = C.squared_error(z, l) # use squared error to determine error for now error = C.squared_error(z, l) # use fsadagrad optimizer momentum_time_constant = C.momentum_as_time_constant_schedule(BATCH_SIZE / -math.log(0.9)) learner = C.fsadagrad(z.parameters, lr=lr_schedule, momentum=momentum_time_constant, unit_gain=True) trainer = C.Trainer(z, (loss, error), [learner]) # train
################### ##### Network ##### ################### # Output is a single node with a linear operation. input = cntk.input_variable(input_dim) label = cntk.input_variable(num_outputs) pred = Dense(num_outputs)(input) ################## ###### Loss ###### ################## # Defining loss function and evaluation metric loss = cntk.squared_error(pred, label) eval_fun = cntk.squared_error(pred, label) ###################### ###### Training ###### ###################### # Instantiate the trainer object to drive the model training learning_rate = learning_rate_schedule(args.initial_learning_rate, UnitType.minibatch) optimizer_op = sgd(pred.parameters, learning_rate) train_op = Trainer(pred, (loss, eval_fun), [optimizer_op]) for step in range(0, args.num_iterations): for batch_num in range(0, num_minibatches_to_train): batch_features = features[(batch_num * args.batch_size):(
def content_loss(a,b): channels, x, y = a.shape return C.squared_error(a, b)/(channels*x*y)
def style_loss(a, b): channels, x, y = a.shape assert x == y A = gram(a) B = npgram(b) return C.squared_error(A, B)/(channels**2 * x**4)