def test_mnist(): (train_x, train_y), (test_x, test_y) = mnist.load_data() val_x = train_x[50000:] val_y = train_y[50000:] train_x = train_x[:50000] train_y = train_y[:50000] batch_size = 200 modle = models.Sequential() modle.add(layers.Linear(28, input_shape=(None, train_x.shape[1]))) modle.add(layers.ReLU()) modle.add(layers.Linear(10)) modle.add(layers.ReLU()) modle.add(layers.Linear(10)) modle.add(layers.Softmax()) acc = losses.categorical_accuracy.__name__ modle.compile(losses.CrossEntropy(), optimizers.SGD(lr=0.001), metrics=[losses.categorical_accuracy]) modle.summary() history = modle.train(train_x, train_y, batch_size, epochs=32, validation_data=(val_x, val_y)) epochs = range(1, len(history["loss"]) + 1) plt.plot(epochs, history["loss"], 'ro', label="Traning loss") plt.plot(epochs, history["val_loss"], 'go', label="Validating loss") plt.plot(epochs, history[acc], 'r', label="Traning accuracy") plt.plot(epochs, history["val_" + acc], 'g', label="Validating accuracy") plt.title('Training/Validating loss/accuracy') plt.xlabel('Epochs') plt.ylabel('Loss/Accuracy') plt.legend() plt.show(block=True)
def __init__(self, settings): # print "building controller ... " self.seq_info = tensor.tensor3(dtype=dtype, name='seq_info') self.seq_lang = tensor.tensor3(dtype=dtype, name='seq_lang') self.seq_target = tensor.tensor3(dtype=dtype, name='seq_target') # self.model = models.SelGen(settings) # self.model.compute_loss(self.seq_info, self.seq_lang, self.seq_target) # assert (settings['optimizer'] == 'adam' or settings['optimizer'] == 'sgd') if settings['optimizer'] == 'adam': self.adam_optimizer = optimizers.Adam(adam_params=None) elif settings['optimizer'] == 'sgd': self.adam_optimizer = optimizers.SGD(adam_params=None) else: print "Choose a optimizer ! " # self.adam_optimizer.compute_updates(self.model.params, self.model.grad_params) # print "compiling training function ... " self.model_learn = theano.function( inputs=[self.seq_info, self.seq_lang, self.seq_target], outputs=self.model.cost, updates=self.adam_optimizer.updates) print "compiling dev function ... " self.model_dev = theano.function( inputs=[self.seq_info, self.seq_lang, self.seq_target], outputs=self.model.cost) self.save_model = self.model.save_model self.get_model = self.model.get_model
def compile(self, optimizer=optimizers.SGD()) : """ Takes all of the layers and connects them together, prepares the network to be run. @optimizer - instance of the optimizer class hierarchy """ # symbolically run through the entire network temp_x = self.inputs for layer in self.layers : temp_x = layer.feed(temp_x) # self.feed_forward hold the symbolic result for an output given an input self.feed_forward = temp_x self.costfn = optimizer.compile(self.feed_forward, self.outputs) updates = optimizer.updates(self.layers) # define a symbolic training iteration based on the input and output data, # the cost function, and the update algorithm defined in the optimizer class self._train = theano.function( inputs=[self.inputs, self.outputs], outputs=self.costfn, updates=updates, name="train" ) self._evaluate = theano.function( inputs=[self.inputs, self.outputs], outputs=[self.costfn, self.feed_forward], # grab the cost and the raw output for name="evaluate" # the evaluation steps )
def __init__(self, env_name='HalfCheetah-v1', policy_params=None, num_workers=32, num_deltas=320, deltas_used=320, delta_std=0.02, logdir=None, rollout_length=1000, step_size=0.01, shift='constant zero', params=None, seed=123): logz.configure_output_dir(logdir) logz.save_params(params) env = minitaur_gym_env.MinitaurBulletEnv() #gym.make(env_name) self.timesteps = 0 self.action_size = env.action_space.shape[0] self.ob_size = env.observation_space.shape[0] self.num_deltas = num_deltas self.deltas_used = deltas_used self.rollout_length = rollout_length self.step_size = step_size self.delta_std = delta_std self.logdir = logdir self.shift = shift self.params = params self.max_past_avg_reward = float('-inf') self.num_episodes_used = float('inf') # create shared table for storing noise print("Creating deltas table.") deltas_id = create_shared_noise.remote() self.deltas = SharedNoiseTable(ray.get(deltas_id), seed = seed + 3) print('Created deltas table.') # initialize workers with different random seeds print('Initializing workers.') self.num_workers = num_workers self.workers = [Worker.remote(seed + 7 * i, env_name=env_name, policy_params=policy_params, deltas=deltas_id, rollout_length=rollout_length, delta_std=delta_std) for i in range(num_workers)] # initialize policy if policy_params['type'] == 'linear': self.policy = LinearPolicy(policy_params) self.w_policy = self.policy.get_weights() else: raise NotImplementedError # initialize optimization algorithm self.optimizer = optimizers.SGD(self.w_policy, self.step_size) print("Initialization of ARS complete.")
def __init__(self, step_size=0.1): Base_ARS_Agent.__init__(self) # Hmm. Maybe this shouldn't be here. # initialize optimization algorithm self.optimizer = optimizers.SGD(self.weights, step_size) print("Initialization of ARS complete.")
def train(): num_class = 10 mnist = tf.keras.datasets.mnist fashion = False if fashion: from keras.datasets import fashion_mnist mnist = fashion_mnist (x_train, y_train), (x_test, y_test) = mnist.load_data() x_train, x_test = x_train / 255.0, x_test / 255.0 x_train = np.reshape(x_train, (-1, 1, 28 * 28)) x_test = np.reshape(x_test, (-1, 1, 28 * 28)) # x_train = np.reshape(x_train, (-1, 1, 28, 28)) # x_test = np.reshape(x_test, (-1, 1, 28, 28)) X = np.array(np.append(x_train, x_test, axis=0)) Y = np.eye(num_class)[np.append(y_train, y_test)].reshape( -1, 1, 10) # one hot vectors shape: (70000, 1, 10) ip = Input(input_size=(1, 784)) # x = Conv2d(number_of_kernel=3, kernel_size=5, activation="relu")(ip) # x = Pool2d(kernel_size=5)(x) # y = Conv2d(number_of_kernel=3, kernel_size=5, activation="relu")(ip) # y = Pool2d(kernel_size=5)(y) # a = Add(weights_of_layers=[1, 3])([x, y]) # c = Concat(axis=1)([x, y]) # f = Flatten()(a) x1 = Dense(units=256, activation="sigmoid")(ip) # y1 = Dense(units=20, activation="sigmoid")(x1) # y2 = Dense(units=20, activation="sigmoid", learning_rate=1)(x1) # c1 = Concat(axis=1)([y1, y2]) # # x2 = Dense(units=50, activation="sigmoid", learning_rate=1)(ip) # z1 = Dense(units=20, activation="sigmoid", learning_rate=1)(x2) # z2 = Dense(units=20, activation="sigmoid", learning_rate=1)(x2) # c2 = Concat(axis=1)([z1, z2]) # c = Concat(axis=1)([c1, c2]) op = Dense(units=num_class, activation="sigmoid")(x1) nn = NeuralNet(ip, op) sgd = optimizers.SGD(gamma=0.9, nesterov=False) adagrad = optimizers.Adagrad() adadelta = optimizers.Adadelta() rmsprop = optimizers.RMSProp(first_order_momentum=False, gamma=0) nn.build_model(loss="XE", optimizer=rmsprop, learning_rate=None, batch_size=100) nn.train(train_x=X[:60000], train_y=Y[:60000], test_x=X[60000:], test_y=Y[60000:], epochs=10)
def compile(self, loss, optimizer=optimizers.SGD()): self.optimizer = optimizer self.loss = loss curr_out_size = 0 for layer in self.layers: curr_out_size = layer.compile(curr_out_size) self.optimizer.compile([l.weights for l in self.layers[1:]])
def fcl02(): net = n.NeuralNetwork([ l.InputLayer(height=28, width=28), l.FullyConnectedLayer( 10, init_func=f.glorot_uniform, act_func=f.softmax) ], f.categorical_crossentropy) optimizer = o.SGD(0.1) num_epochs = 1 batch_size = 10 return net, optimizer, num_epochs, batch_size
def create_mlp(): model = Mlp() model.add_layer(16, activation('relu'), in_dims=784) model.add_layer(32, activation('relu')) model.add_layer(64, activation('relu')) model.add_layer(32, activation('relu')) model.add_layer(16, activation('relu')) model.add_layer(10, activation('softmax')) optimizer = optim.SGD(learning_rate=1e-3, decay=1e-6, momentum=0.9) model.compile(loss='mean_squared_error', optimizer='sgd') return model
def fcl01(): net = n.NeuralNetwork([ l.InputLayer(height=28, width=28), l.FullyConnectedLayer( 100, init_func=f.glorot_uniform, act_func=f.sigmoid), l.FullyConnectedLayer( 10, init_func=f.glorot_uniform, act_func=f.sigmoid) ], f.quadratic) optimizer = o.SGD(3.0) num_epochs = 1 batch_size = 100 return net, optimizer, num_epochs, batch_size
def __init__(self, model_settings): print "building trainer ... " self.seq_lang = tensor.ivector(name='seq_lang') self.seq_world = tensor.matrix( name='seq_world', dtype=dtype ) # shape -- len_path * dim_raw_world_input self.seq_action = tensor.ivector(name='seq_action') # self.model_settings = model_settings # self.neural_walker = models.NeuralWalker( model_settings = self.model_settings ) self.neural_walker.compute_loss( self.seq_lang, self.seq_world, self.seq_action ) # # assert( self.model_settings['optimizer'] == 'adam' or self.model_settings['optimizer'] == 'sgd' ) if self.model_settings['optimizer'] == 'adam': self.optimizer = optimizers.Adam() else: self.optimizer = optimizers.SGD() # self.optimizer.compute_updates( self.neural_walker.params, self.neural_walker.grad_params ) # self.model_learn = theano.function( inputs = [ self.seq_lang, self.seq_world, self.seq_action ], outputs = self.neural_walker.cost, updates = self.optimizer.updates ) # self.model_dev = theano.function( inputs = [ self.seq_lang, self.seq_world, self.seq_action ], outputs = self.neural_walker.cost, ) # self.get_model = self.neural_walker.get_model self.save_model = self.neural_walker.save_model
def cnn01(): net = n.NeuralNetwork([ l.InputLayer(height=28, width=28), l.ConvolutionalLayer( 2, kernel_size=5, init_func=f.glorot_uniform, act_func=f.sigmoid), l.MaxPoolingLayer(pool_size=2), l.FullyConnectedLayer( height=10, init_func=f.glorot_uniform, act_func=f.softmax) ], f.log_likelihood) optimizer = o.SGD(0.1) num_epochs = 3 batch_size = 10 return net, optimizer, num_epochs, batch_size
def cnn02(): net = n.NeuralNetwork([ l.InputLayer(height=28, width=28), l.ConvolutionalLayer( 2, kernel_size=5, init_func=f.glorot_uniform, act_func=f.sigmoid), l.MaxPoolingLayer(pool_size=3), l.FullyConnectedLayer( height=10, init_func=f.glorot_uniform, act_func=f.softmax) ], f.categorical_crossentropy) optimizer = o.SGD(0.1) num_epochs = 2 batch_size = 8 return net, optimizer, num_epochs, batch_size
def train(self, X, y, optimizer, epochs=1000, X_va=None, y_va=None, **kwargs): """ This function implements the neural network's training routine. Parameters ---------- X : numpy.ndarray the design matrix y : numpy.ndarray the target column vector optimizer: str the type of optimizer, either SGD or CGD, that has to be used during the training epochs: int the training routine's number of epochs (Default value = 1000) X_va: numpy.ndarray the design matrix used for the validation (Default value = None) y_va: numpy.ndarray the target column vector used for the validation (Default value = None) kwargs: dict additional parameters for the optimizers' initialization Returns ------- """ assert optimizer in ['SGD', 'CGD'] if optimizer == 'SGD': self.optimizer = opt.SGD(self, **kwargs) self.optimizer.optimize(self, X, y, X_va, y_va, epochs) else: self.optimizer = opt.CGD(self, **kwargs) self.optimizer.optimize(self, X, y, X_va, y_va, **kwargs)
def run_regression(): df = np.array(pd.read_csv('data/Dataset/Training/Features_Variant_1.csv')) model = Model.Model() model.add_layer(layers.Input(53)) model.add_layer(layers.Dense(20, activation=af.relu)) model.add_layer(layers.Dense(1, activation=af.sigmoid)) model.compile(losses.mse, optimizers.SGD()) input_set = np.array([x[:-1] for x in df]) output_set = np.array([x[-1] for x in df]).reshape(len(input_set), 1) # Model.save_model(model, "test") # tmp = Model.load_model("test") # tmp.fit(input_set, output_set, 50, 50, metric_callback=regression_metric_mse) input_set = helpers.standard_scaler(input_set) output_set = helpers.standard_scaler(output_set) np.seterr(all="raise") model.fit(input_set, output_set, 50, 100, metric_callback=regression_metric_mse) Model.save_model(model,"SGD")
def __init__(self, env_name='HalfCheetah-v1', policy_params=None, num_workers=32, num_deltas=320, deltas_used=320, delta_std=0.02, logdir=None, rollout_length=1000, step_size=0.01, shift='constant zero', params=None, seed=123): logz.configure_output_dir(logdir) logz.save_params(params) env = gym.make(env_name) self.timesteps = 0 self.action_size = env.action_space.shape[0] self.ob_size = env.observation_space.shape[0] self.num_deltas = num_deltas self.deltas_used = deltas_used self.rollout_length = rollout_length self.step_size = step_size self.delta_std = delta_std self.logdir = logdir self.shift = shift self.params = params self.env_name = env_name self.seed = seed # exp statistics self.max_past_avg_reward = float('-inf') self.num_episodes_used = float('inf') # create shared table for storing noise print("Creating deltas table.") # generate noise sequence of fixed size in object stores deltas_id = create_shared_noise.remote() self.deltas = SharedNoiseTable(ray.get(deltas_id), seed=seed + 3) print('Created deltas table.') # initialize workers with different random seeds print('Initializing workers.') self.num_workers = num_workers self.workers = [ Worker.remote(seed + 7 * i, env_name=env_name, policy_params=policy_params, deltas=deltas_id, rollout_length=rollout_length, delta_std=delta_std) for i in range(num_workers) ] # initialize policy if policy_params['type'] == 'linear': self.policy = LinearPolicy(policy_params) self.w_policy = self.policy.get_weights() else: raise NotImplementedError # initialize optimization algorithm self.optimizer = optimizers.SGD(self.w_policy, self.step_size) print("Initialization of ARS complete.") # params->return tuple dataset self.dataset_x = [] self.dataset_y = [] self.batch_size = 2 * self.num_deltas # set batch_size equal to num_directions self.reward_func = RewardFunction(params_dim=self.w_policy.size, hidden_dim=100, lr=1e-2, seed=self.seed)
torchvision.datasets.MNIST('./data/', train=False, download=True, transform=torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), ])), shuffle=True, batch_size=batch_size) return train_loader, test_loader if __name__ == '__main__': torch.random.manual_seed(1234) np.random.seed(1234) epochs = 10 lr = 0.01 batch_size = 32 optimizer = optimizers.SGD(learning_rate=lr) criterion = loss.CrossEntropy() layers = [ layers.LinearLayer(784, 512), layers.ReLU(), layers.Dropout(keep_rate=0.8), layers.LinearLayer(512, 512), layers.ReLU(), layers.Dropout(keep_rate=0.8), layers.LinearLayer(512, 10) ] model = Model(layers, optimizer, criterion) train_loader, test_loader = get_dataset(batch_size) for epoch_id in range(epochs): model.train()
def __init__(self, env_name='HalfCheetah-v1', policy_params=None, num_workers=32, num_deltas=320, deltas_used=320, delta_std=0.02, logdir=None, rollout_length=1000, step_size=0.01, shift='constant zero', params=None, seed=123): logz.configure_output_dir(logdir) logz.save_params(params) env = gym.make(env_name) self.timesteps = 0 self.action_size = env.action_space.shape[0] self.ob_size = env.observation_space.shape[0] self.num_deltas = num_deltas self.deltas_used = deltas_used self.rollout_length = rollout_length self.step_size = step_size self.delta_std = delta_std self.logdir = logdir self.shift = shift self.params = params self.max_past_avg_reward = float('-inf') self.num_episodes_used = float('inf') # Parameters for Q Learner self.memory = ReplayMemory(10000) self.BATCH_SIZE = 128 self.GAMMA = 0.999 self.TARGET_UPDATE = 5 # create shared table for storing noise print("Creating deltas table.") deltas_id = create_shared_noise.remote() self.deltas = SharedNoiseTable(ray.get(deltas_id), seed = seed + 3) print('Created deltas table.') # initialize workers with different random seeds print('Initializing workers.') self.num_workers = num_workers self.workers = [Worker.remote(seed + 7 * i, env_name=env_name, policy_params=policy_params, deltas=deltas_id, rollout_length=rollout_length, delta_std=delta_std) for i in range(num_workers)] print(self.workers[0]) # initialize policy if policy_params['type'] == 'linear': self.policy = LinearPolicy(policy_params) self.w_policy = self.policy.get_weights() elif policy_params['type'] == 'bilayer': self.policy = BilayerPolicy(policy_params) self.w_policy = self.policy.get_weights() elif policy_params['type'] == 'bilayer_safe_explorer': self.policy = SafeBilayerExplorerPolicy(policy_params,trained_weights='/home/harshit/work/ARS/trained_policies/Madras-explore7/safeQ_torch119.pt') self.w_policy = self.policy.get_weights() else: raise NotImplementedError # initialize optimization algorithm self.optimizer = optimizers.SGD(self.w_policy, self.step_size) print("Initialization of ARS complete.")
def __init__(self): self.layers = [] self.loss = losses.mse self.optimizer = optimizers.SGD()
model.add(layer.Dense(5)) w = numpy.array([[1], [9]]) w2 = numpy.array([[5, 4]]) w3 = numpy.array([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]]) text_x = numpy.random.randn(1000, 2) text_y = numpy.dot(text_x, w) text_y = numpy.dot(text_y, w2) text_y = numpy.dot(text_y, w3) text_y = text_y model.init() model.fit(text_x, text_y, epoch=10000, batch_num=100) model.compile(loss="Mean_squared_error", optimizer=optimizers.SGD(model, speed=0.000001)) model.train() t = "" isfirst = True for now in model.now_model: print(now.w) if isfirst: isfirst = False t = now.w else: t = numpy.dot(t, now.w) print(numpy.dot(numpy.dot(w, w2), w3)) print(w) print(t)
def __init__(self, settings): print("building controller ... ") ''' seq_time_to_current : T * size_batch -- t_i - t_i-1 seq_type_event : (T+1) * size_batch -- k_i seq_time_rep : (T+1) * size_batch * dim_time -- for each data and each time step, track the time features of event k_i time_since_start_to_end : size_batch -- time for seq num_sims_start_to_end : size_batch -- N for each seq seq_mask : T * size_batch -- 1/0 seq_sims_time_to_current : N * size_batch -- s_j - t_i seq_sims_index_in_hidden : N * size_batch -- int32 seq_sims_mask : N * size_batch -- 1/0 ''' #self.seq_time_to_end = tensor.matrix( # dtype=dtype, name='seq_time_to_end' #) self.seq_time_to_current = tensor.matrix(dtype=dtype, name='seq_time_to_current') self.seq_type_event = tensor.imatrix(name='seq_type_event') #self.seq_time_rep = tensor.tensor3( # dtype=dtype, name='seq_time_rep' #) self.seq_time_values = tensor.matrix(dtype=dtype, name='seq_time_values') # self.time_since_start_to_end = tensor.vector( dtype=dtype, name='time_since_start_to_end') self.num_sims_start_to_end = tensor.vector( dtype=dtype, name='num_sims_start_to_end') self.seq_mask = tensor.matrix(dtype=dtype, name='seq_mask') self.seq_sims_time_to_current = tensor.matrix( dtype=dtype, name='seq_sims_time_to_current') self.seq_sims_index_in_hidden = tensor.imatrix( name='seq_sims_index_in_hidden') self.seq_sims_mask = tensor.matrix(dtype=dtype, name='seq_sims_mask') # self.hawkes_ctsm = models.GeneralizedNeuralHawkesCTSM_time(settings) # self.hawkes_ctsm.compute_loss( #self.seq_time_to_end, self.seq_time_to_current, self.seq_type_event, #self.seq_time_rep, self.seq_time_values, self.time_since_start_to_end, self.num_sims_start_to_end, self.seq_mask, self.seq_sims_time_to_current, self.seq_sims_index_in_hidden, self.seq_sims_mask) # assert (settings['optimizer'] == 'adam' or settings['optimizer'] == 'sgd') if settings['optimizer'] == 'adam': self.adam_optimizer = optimizers.Adam(adam_params=None) elif settings['optimizer'] == 'sgd': self.adam_optimizer = optimizers.SGD(adam_params=None) else: print("Choose a optimizer ! ") # self.adam_optimizer.compute_updates(self.hawkes_ctsm.params, self.hawkes_ctsm.grad_params, list_constrain=[]) # in this version, no hard constraints on parameters # print("compiling training function ... ") self.model_learn = theano.function( inputs=[ #self.seq_time_to_end, self.seq_time_to_current, self.seq_type_event, #self.seq_time_rep, self.seq_time_values, self.time_since_start_to_end, self.num_sims_start_to_end, self.seq_mask, self.seq_sims_time_to_current, self.seq_sims_index_in_hidden, self.seq_sims_mask ], outputs=[ self.hawkes_ctsm.log_likelihood_seq, self.hawkes_ctsm.log_likelihood_time, self.hawkes_ctsm.log_likelihood_type, self.hawkes_ctsm.num_of_events ], updates=self.adam_optimizer.updates) print("compiling dev function ... ") self.model_dev = theano.function( inputs=[ #self.seq_time_to_end, self.seq_time_to_current, self.seq_type_event, #self.seq_time_rep, self.seq_time_values, self.time_since_start_to_end, self.num_sims_start_to_end, self.seq_mask, self.seq_sims_time_to_current, self.seq_sims_index_in_hidden, self.seq_sims_mask ], outputs=[ self.hawkes_ctsm.log_likelihood_seq, self.hawkes_ctsm.log_likelihood_time, self.hawkes_ctsm.log_likelihood_type, self.hawkes_ctsm.num_of_events ]) # #self.get_model = self.hawkes_ctsm.get_model self.save_model = self.hawkes_ctsm.save_model
def __init__(self, settings): print("building controller ... ") ''' seq_time_to_end : T * size_batch -- T - t_i seq_time_to_current : T * T * size_batch -- for each batch, it is T * T, and at each time step t, it tracks the ( t_i - t_i' ) for all t_i' < t_i seq_type_event : T * size_batch -- for each data and each time step, tracks the type of event k_i time_since_start_to_end : size_batch -- time for seq # seq_mask : T * size_batch -- 1/0 seq_mask_to_current : T * T * size_batch -- 1/0 ''' self.seq_time_to_end = tensor.matrix(dtype=dtype, name='seq_time_to_end') self.seq_time_to_current = tensor.tensor3(dtype=dtype, name='seq_time_to_current') self.seq_type_event = tensor.imatrix(name='seq_type_event') self.time_since_start_to_end = tensor.vector( dtype=dtype, name='time_since_start_to_end') self.seq_mask = tensor.matrix(dtype=dtype, name='seq_mask') self.seq_mask_to_current = tensor.tensor3(dtype=dtype, name='seq_mask_to_current') # self.seq_sims_time_to_current = tensor.tensor3( dtype=dtype, name='seq_sims_time_to_current') self.seq_sims_mask = tensor.matrix(dtype=dtype, name='seq_sims_mask') self.seq_sims_mask_to_current = tensor.tensor3( dtype=dtype, name='seq_sims_mask_to_current') # # self.hawkes_ctsm = models.HawkesCTSM(settings) # self.hawkes_ctsm.compute_loss(self.seq_time_to_end, self.seq_time_to_current, self.seq_type_event, self.time_since_start_to_end, self.seq_mask, self.seq_mask_to_current) # assert (settings['optimizer'] == 'adam' or settings['optimizer'] == 'sgd') if settings['optimizer'] == 'adam': self.adam_optimizer = optimizers.Adam(adam_params=None) elif settings['optimizer'] == 'sgd': self.adam_optimizer = optimizers.SGD(adam_params=None) else: print("Choose a optimizer ! ") # if 'learn_rate' in settings: print("learn rate is set to : ", settings['learn_rate']) self.adam_optimizer.set_learn_rate(settings['learn_rate']) # self.adam_optimizer.compute_updates(self.hawkes_ctsm.params, self.hawkes_ctsm.grad_params, list_constrain=range(3)) # print("compiling training function ... ") self.model_learn = theano.function( inputs=[ self.seq_time_to_end, self.seq_time_to_current, self.seq_type_event, self.time_since_start_to_end, self.seq_mask, self.seq_mask_to_current ], outputs=[ self.hawkes_ctsm.log_likelihood_seq, self.hawkes_ctsm.log_likelihood_time, self.hawkes_ctsm.log_likelihood_type, self.hawkes_ctsm.num_of_events ], updates=self.adam_optimizer.updates) print("compiling dev function ... ") self.model_dev = theano.function( inputs=[ self.seq_time_to_end, self.seq_time_to_current, self.seq_type_event, self.time_since_start_to_end, self.seq_mask, self.seq_mask_to_current ], outputs=[ self.hawkes_ctsm.log_likelihood_seq, self.hawkes_ctsm.log_likelihood_time, self.hawkes_ctsm.log_likelihood_type, self.hawkes_ctsm.num_of_events ]) if settings['predict_lambda']: print("compiling dev function for intensity computation ... ") self.hawkes_ctsm.compute_lambda(self.seq_type_event, self.seq_sims_time_to_current, self.seq_sims_mask, self.seq_sims_mask_to_current) self.model_dev_lambda = theano.function( inputs=[ self.seq_type_event, self.seq_sims_time_to_current, self.seq_sims_mask, self.seq_sims_mask_to_current ], outputs=[ self.hawkes_ctsm.lambda_samples, self.hawkes_ctsm.num_of_samples ]) # #self.get_model = self.hawkes_ctsm.get_model self.save_model = self.hawkes_ctsm.save_model
def __init__(self, settings): print("building controller ... ") ''' seq_time_to_current : T * size_batch -- t_i - t_i-1 seq_type_event : (T+1) * size_batch -- k_i seq_time_rep : (T+1) * size_batch * dim_time -- for each data and each time step, track the time features of event k_i time_since_start_to_end : size_batch -- time for seq num_sims_start_to_end : size_batch -- N for each seq seq_mask : T * size_batch -- 1/0 seq_sims_time_to_current : N * size_batch -- s_j - t_i seq_sims_index_in_hidden : N * size_batch -- int32 seq_sims_mask : N * size_batch -- 1/0 ''' #self.seq_time_to_end = tensor.matrix( # dtype=dtype, name='seq_time_to_end' #) self.seq_time_to_current = tensor.matrix(dtype=dtype, name='seq_time_to_current') self.seq_type_event = tensor.imatrix(name='seq_type_event') #self.seq_time_rep = tensor.tensor3( # dtype=dtype, name='seq_time_rep' #) self.seq_time_values = tensor.matrix(dtype=dtype, name='seq_time_values') # self.time_since_start_to_end = tensor.vector( dtype=dtype, name='time_since_start_to_end') self.num_sims_start_to_end = tensor.vector( dtype=dtype, name='num_sims_start_to_end') self.seq_mask = tensor.matrix(dtype=dtype, name='seq_mask') self.seq_sims_time_to_current = tensor.matrix( dtype=dtype, name='seq_sims_time_to_current') self.seq_sims_index_in_hidden = tensor.imatrix( name='seq_sims_index_in_hidden') self.seq_sims_mask = tensor.matrix(dtype=dtype, name='seq_sims_mask') self.time_diffs = tensor.vector(dtype=dtype, name='time_diffs') # # if settings['model'] == 'neuraladapttime': self.hawkes_ctsm = models.NeuralHawkesAdaptiveBaseCTSM_time( settings) list_constrain = [] elif settings['model'] == 'neuraladapttimescale': self.hawkes_ctsm = models.NeuralHawkesAdaptiveBaseCTSM_time_scale( settings) list_constrain = [0] elif settings['model'] == 'neuralreduce': self.hawkes_ctsm = models.NeuralHawkesAdaptiveBaseCTSM_time_scale_r( settings) list_constrain = [0] elif settings['model'] == 'conttime': self.hawkes_ctsm = models.NeuralHawkesCTLSTM(settings) list_constrain = [0] else: print("called wrong controller") # assert (settings['loss_type'] == 'loglikehood' or settings['loss_type'] == 'prediction') # if settings['loss_type'] == 'loglikehood': print("train with log-likelihood ... ") self.hawkes_ctsm.compute_loss( #self.seq_time_to_end, self.seq_time_to_current, self.seq_type_event, #self.seq_time_rep, self.seq_time_values, self.time_since_start_to_end, self.num_sims_start_to_end, self.seq_mask, self.seq_sims_time_to_current, self.seq_sims_index_in_hidden, self.seq_sims_mask) else: print("train with prediction ... ") #TODO: need to add switch for less memory #or faster speed #self.hawkes_ctsm.compute_prediction_loss( self.hawkes_ctsm.compute_prediction_loss_lessmem( self.seq_type_event, self.seq_time_values, self.seq_mask, self.time_diffs) # #self.hawkes_ctsm.compute_prediction( # self.seq_type_event, # self.seq_time_values, # self.seq_mask, # self.time_diffs #) # assert (settings['optimizer'] == 'adam' or settings['optimizer'] == 'sgd') if settings['optimizer'] == 'adam': self.adam_optimizer = optimizers.Adam(adam_params=None) elif settings['optimizer'] == 'sgd': self.adam_optimizer = optimizers.SGD(adam_params=None) else: print("Choose a optimizer ! ") # if 'learn_rate' in settings: print("learn rate is set to : ", settings['learn_rate']) self.adam_optimizer.set_learn_rate(settings['learn_rate']) # self.adam_optimizer.compute_updates(self.hawkes_ctsm.params, self.hawkes_ctsm.grad_params, list_constrain=list_constrain) # in this version, no hard constraints on parameters # if settings['loss_type'] == 'loglikehood': print("optimize loglikehood ... ") print("compiling training function ... ") self.model_learn = theano.function( inputs=[ self.seq_time_to_current, self.seq_type_event, self.seq_time_values, self.time_since_start_to_end, self.num_sims_start_to_end, self.seq_mask, self.seq_sims_time_to_current, self.seq_sims_index_in_hidden, self.seq_sims_mask ], outputs=[ self.hawkes_ctsm.log_likelihood_seq, self.hawkes_ctsm.log_likelihood_time, self.hawkes_ctsm.log_likelihood_type, self.hawkes_ctsm.num_of_events ], updates=self.adam_optimizer.updates, on_unused_input='ignore') print("compiling dev function ... ") self.model_dev = theano.function( inputs=[ #self.seq_time_to_end, self.seq_time_to_current, self.seq_type_event, #self.seq_time_rep, self.seq_time_values, self.time_since_start_to_end, self.num_sims_start_to_end, self.seq_mask, self.seq_sims_time_to_current, self.seq_sims_index_in_hidden, self.seq_sims_mask ], outputs=[ self.hawkes_ctsm.log_likelihood_seq, self.hawkes_ctsm.log_likelihood_time, self.hawkes_ctsm.log_likelihood_type, self.hawkes_ctsm.num_of_events, self.hawkes_ctsm.last_hidden_t, self.hawkes_ctsm.last_cell_t, self.hawkes_ctsm.last_cell_target, self.hawkes_ctsm.last_cell, self.hawkes_ctsm.last_cell_decay, self.hawkes_ctsm.last_gate_output ], on_unused_input='ignore') if settings['predict_lambda']: print("compiling dev function for intensity computation ... ") self.hawkes_ctsm.compute_lambda(self.seq_type_event, self.seq_time_values, self.seq_sims_time_to_current, self.seq_sims_index_in_hidden, self.seq_sims_mask) self.model_dev_lambda = theano.function( inputs=[ self.seq_type_event, self.seq_time_values, self.seq_sims_time_to_current, self.seq_sims_index_in_hidden, self.seq_sims_mask ], outputs=[ self.hawkes_ctsm.lambda_samples, self.hawkes_ctsm.num_of_samples ], on_unused_input='ignore') else: print("optimize prediction ... ") print("compiling training function ... ") self.model_learn = theano.function( inputs=[ self.seq_type_event, self.seq_time_values, self.seq_mask, self.time_diffs ], outputs=[ self.hawkes_ctsm.log_likelihood_type_predict, self.hawkes_ctsm.num_of_errors, self.hawkes_ctsm.square_errors, self.hawkes_ctsm.num_of_events #self.hawkes_ctsm.abs_grad_params ], updates=self.adam_optimizer.updates, on_unused_input='ignore') print("compiling dev function ... ") self.model_dev = theano.function( inputs=[ self.seq_type_event, self.seq_time_values, self.seq_mask, self.time_diffs ], outputs=[ self.hawkes_ctsm.log_likelihood_type_predict, self.hawkes_ctsm.num_of_errors, self.hawkes_ctsm.square_errors, self.hawkes_ctsm.num_of_events #self.hawkes_ctsm.abs_grad_params # ], on_unused_input='ignore') # # self.get_model = self.hawkes_ctsm.get_model self.save_model = self.hawkes_ctsm.save_model
def __init__(self, settings): print("building controller ... ") ''' seq_time_to_end : T * size_batch -- T - t_i seq_time_to_current : T * T * size_batch -- for each batch, it is T * T, and at each time step t, it tracks the ( t_i - t_i' ) for all t_i' < t_i seq_type_event : T * size_batch -- for each data and each time step, tracks the type of event k_i time_since_start_to_end : size_batch -- time for seq num_sims_start_to_end : size_batch -- # of samples for seq # seq_mask : T * size_batch -- 1/0 seq_mask_to_current : T * T * size_batch -- 1/0 seq_sims_time_to_current : N * T * size_batch seq_sims_mask_to_current : N * T * size_batch seq_sims_mask : N * size_batch ''' #self.seq_time_to_end = tensor.matrix( # dtype=dtype, name='seq_time_to_end' #) self.seq_time_to_current = tensor.tensor3(dtype=dtype, name='seq_time_to_current') self.seq_type_event = tensor.imatrix(name='seq_type_event') self.time_since_start_to_end = tensor.vector( dtype=dtype, name='time_since_start_to_end') self.num_sims_start_to_end = tensor.vector( dtype=dtype, name='num_sims_start_to_end') self.seq_mask = tensor.matrix(dtype=dtype, name='seq_mask') self.seq_mask_to_current = tensor.tensor3(dtype=dtype, name='seq_mask_to_current') self.seq_sims_time_to_current = tensor.tensor3( dtype=dtype, name='seq_sims_time_to_current') self.seq_sims_mask_to_current = tensor.tensor3( dtype=dtype, name='seq_sims_mask_to_current') self.seq_sims_mask = tensor.matrix(dtype=dtype, name='seq_sims_mask') # if settings['model'] == 'hawkesinhib': self.hawkes_ctsm = models.HawkesInhibCTSM(settings) list_constrain = [2] elif settings['model'] == 'hawkesinhibscale': self.hawkes_ctsm = models.HawkesInhibCTSM_scale(settings) list_constrain = [0, 3] else: print("called wrong controller") # # self.hawkes_ctsm.compute_loss( #self.seq_time_to_end, self.seq_time_to_current, self.seq_type_event, self.time_since_start_to_end, self.num_sims_start_to_end, self.seq_mask, self.seq_mask_to_current, self.seq_sims_time_to_current, self.seq_sims_mask_to_current, self.seq_sims_mask) # assert (settings['optimizer'] == 'adam' or settings['optimizer'] == 'sgd') if settings['optimizer'] == 'adam': self.adam_optimizer = optimizers.Adam(adam_params=None) elif settings['optimizer'] == 'sgd': self.adam_optimizer = optimizers.SGD(adam_params=None) else: print("Choose a optimizer ! ") # self.adam_optimizer.compute_updates(self.hawkes_ctsm.params, self.hawkes_ctsm.grad_params, list_constrain=list_constrain) # print("compiling training function ... ") self.model_learn = theano.function( inputs=[ #self.seq_time_to_end, self.seq_time_to_current, self.seq_type_event, self.time_since_start_to_end, self.num_sims_start_to_end, self.seq_mask, self.seq_mask_to_current, self.seq_sims_time_to_current, self.seq_sims_mask_to_current, self.seq_sims_mask ], outputs=[ self.hawkes_ctsm.log_likelihood_seq, self.hawkes_ctsm.log_likelihood_time, self.hawkes_ctsm.log_likelihood_type, self.hawkes_ctsm.num_of_events ], updates=self.adam_optimizer.updates) print("compiling dev function ... ") self.model_dev = theano.function( inputs=[ #self.seq_time_to_end, self.seq_time_to_current, self.seq_type_event, self.time_since_start_to_end, self.num_sims_start_to_end, self.seq_mask, self.seq_mask_to_current, self.seq_sims_time_to_current, self.seq_sims_mask_to_current, self.seq_sims_mask ], outputs=[ self.hawkes_ctsm.log_likelihood_seq, self.hawkes_ctsm.log_likelihood_time, self.hawkes_ctsm.log_likelihood_type, self.hawkes_ctsm.num_of_events ]) # #self.get_model = self.hawkes_ctsm.get_model self.save_model = self.hawkes_ctsm.save_model
train_set, val_set, test_set = pickle.load(open("mnist.pkl", "rb"), encoding='latin1') model = model.Sequence() model.add(layer.Dense(300, input_dim=28 * 28, activation="Relu")) #model.add(layer.Dense(300, activation="Relu")) model.add(layer.Dense(10)) train_y = util.to_categorical(train_set[1]) idx = numpy.random.choice(train_set[0].shape[0], 50000) train_set = train_set[0][idx] train_y = train_y[idx] model.init() model.fit(input_data=train_set, output_data=train_y, epoch=500, batch_num=10) model.compile(optimizer=optimizers.SGD(model, 0.1), loss="Mean_squared_error") model.train() id = 0 rightnum = 0 for now in val_set[0]: # plt.imshow(numpy.reshape(now,(28,28))) # plt.show() ans = val_set[1][id] res = model.predict(now) ansnum = numpy.argmax(res) if (ansnum == ans): rightnum += 1 else: print(ans, ansnum) id += 1
batch_size = 50 num_epochs = 100 num_classes = 2 hidden_units = 100 hidden_units2 = 10 dimensions = 2 # PeaksData da, SwissRollData, GMMData X_train, y_train, X_test, y_test = utils.get_data('PeaksData') X_train, y_train = shuffle(X_train, y_train) # gradient and jacobian tests grad_test_W(X_train, y_train) grad_test_b(X_train, y_train) jacobian_test_W(X_train, y_train) jacobian_test_b(X_train, y_train) grad_test_W_whole_network(X_train, y_train) grad_test_b_whole_network(X_train, y_train) model = models.MyNeuralNetwork() model.add(layers.Linear(dimensions, hidden_units)) model.add(activations.ReLU()) model.add(layers.Softmax(hidden_units, 5)) optimizer = optimizers.SGD(model.parameters, lr=0.1) losses, train_accuracy, test_accuracy = model.fit(X_train, y_train, X_test, y_test, batch_size, num_epochs, optimizer) # plotting utils.plot_scores(train_accuracy, test_accuracy)
IN_DIM = train_x.shape[1] HIDDEN_DIM = 100 autoencoder = AutoEncoder(IN_DIM, HIDDEN_DIM, weight_sharing=True) N_EPOCHS = 1000 bar = Progbar(N_EPOCHS) BATCH_SIZE = 64 lr = 0.1 NOISE = 1. cross_entropy = binary_cross_entropy() mse_loss = mean_square_error() optimizer = optim.SGD(autoencoder.parameters(), lr=0.01) for epoch in xrange(N_EPOCHS): steps = (train_x.shape[0] // BATCH_SIZE) if train_x.shape[0] % BATCH_SIZE == 0 else ( train_x.shape[0] // BATCH_SIZE) + 1 entropy_train_loss = 0. mse_train_loss = 0. for ix in xrange(steps): batch_x = train_x[ix:ix + BATCH_SIZE] mask = np.random.binomial(1, NOISE, batch_x.shape) input_x = batch_x * mask reconstruction = autoencoder(input_x) loss = cross_entropy(batch_x, reconstruction) entropy_train_loss += loss mse_train_loss += mse_loss(batch_x, reconstruction)
def __init__(self, env_name='CarEnv', policy_params=None, num_workers=32, num_deltas=320, deltas_used=320, delta_std=0.02, std_decay=0.0, logdir=None, rollout_length=1000, learning_rate=0.01, lr_decay=0.0, shift='constant zero', params=None, seed=123, seconds_per_episode=15, eval_rollouts=None, log_every=10, show_cam=1, enable_gpu=False): logz.configure_output_dir(logdir) logz.save_params(params) env = CarEnv() # Create base CNN for finding edges #base_model = VGG19(weights='imagenet', # include_top=False, # input_shape=(env.img_height, # env.img_width, # 3 # ) # ) self.timesteps = 0 self.action_size = env.action_space.shape[0] self.num_deltas = num_deltas self.deltas_used = deltas_used self.rollout_length = rollout_length self.learning_rate = learning_rate self.lr_decay = lr_decay self.delta_std = delta_std self.std_decay = std_decay self.logdir = logdir self.shift = shift self.params = params self.max_past_avg_reward = float('-inf') self.num_episodes_used = float('inf') self.log_every = log_every self.eval_rollouts = eval_rollouts or self.num_deltas # create shared table for storing noise print("Creating deltas table.") deltas_id = create_shared_noise.remote() self.deltas = SharedNoiseTable(ray.get(deltas_id), seed=seed + 3) print('Created deltas table.') # initialize workers with different random seeds print('Initializing workers.') self.num_workers = num_workers self.workers = [ Worker.remote( seed + 7 * i, env_name=env_name, policy_params=policy_params, deltas=deltas_id, rollout_length=rollout_length, delta_std=delta_std, show_cam=False, num_workers=self.num_workers, enable_gpu=enable_gpu #initial_weights=initial_weights, #initial_mean=initial_mean, #initial_std=initial_std ) for i in range(num_workers - show_cam) ] # Show the number of desired worker cams for i in range(show_cam): self.workers.append( Worker.remote( seed + 7 * i, env_name=env_name, policy_params=policy_params, deltas=deltas_id, rollout_length=rollout_length, delta_std=delta_std, #initial_weights=initial_weights, show_cam=True, num_workers=self.num_workers, enable_gpu=enable_gpu)) # initialize policy if policy_params['type'] == 'linear': self.policy = LinearPolicy(policy_params) self.w_policy = self.policy.get_weights() else: raise NotImplementedError # initialize optimization algorithm self.optimizer = optimizers.SGD(self.w_policy, self.learning_rate, self.lr_decay) print("Initialization of ARS complete.")
y_test = convert_to_one_hot_labels(x_train, y_test, -1) ### Testing the speed of our own framework ### # Defining the model architecture model = containers.Sequential(layers.Linear(2, 25, with_bias=True), activations.ReLU(), layers.Linear(25, 25, with_bias=True), activations.ReLU(), layers.Linear(25, 25, with_bias=True), activations.ReLU(), layers.Linear(25, 2, with_bias=True), activations.Tanh()) criterion = losses.LossMSE() optimizer = optimizers.SGD(model.param(), learning_rate=0.001) def compute_nb_errors(model, data_input, data_target): mini_batch_size = 100 n_misclassified = 0 for b in range(0, data_input.size(0), mini_batch_size): batch_output = model.forward(data_input.narrow(0, b, mini_batch_size)) batch_target = data_target.narrow(0, b, mini_batch_size) output_class = batch_output.max(1)[1] target_class = batch_target.max(1)[1] n_misclassified += (output_class != target_class).sum() error = n_misclassified / data_input.size(0) return error
''' PFN internship 2019 coding task machine learning task-3 Issei NAKASONE ''' import datasets as D import optimizers as op from gnn import GNN, TrainGNN from iterator import Iterator dirpath = '../datasets/train/' batch_size = 128 train, test = D.get_dataset(dirpath, test_ratio=0.25) train_iter = Iterator(train, batch_size) test_iter = Iterator(test, batch_size) model = GNN() optimizer = op.SGD() #optimizer = op.MomentumSGD() optimizer.setup(model) trainer = TrainGNN(optimizer, train_iter, test_iter) trainer.start(epoch=50)