class Solution(): def __init__(self): self = self self.best_step = 1000 self.activations = { 'sigmoid': nn.Sigmoid(), 'relu': nn.ReLU(), 'rrelu0103': nn.RReLU(0.1, 0.3), 'elu': nn.ELU(), 'selu': nn.SELU(), 'leakyrelu01': nn.LeakyReLU(0.1) } self.learning_rate = 0.003 self.momentum = 0.8 self.layers_number = 5 self.hidden_size = 50 self.activation_hidden = 'relu' self.activation_output = 'sigmoid' self.do_batch_norm = True self.sols = {} self.solsSum = {} self.random = 0 #self.do_batch_norm_grid = [False, True] self.random_grid = [_ for _ in range(10)] #self.layers_number_grid = [3, 4, 5, 6, 7, 8, 9, 10] #self.hidden_size_grid = [10, 20, 30, 40, 50] self.momentum_grid = [0.0, 0.3, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] #self.learning_rate_grid = [0.002, 0.003, 0.004, 0.005, 0.006, 0.007, 0.008, 0.009, 0.01] #self.activation_hidden_grid = self.activations.keys() #self.activation_output_grid = self.activations.keys() self.grid_search = GridSearch(self) self.grid_search.set_enabled(False) def create_model(self, input_size, output_size): return SolutionModel(input_size, output_size, self) def get_key(self): return "{}_{}_{}_{}_{}_{}_{}".format(self.learning_rate, self.momentum, self.hidden_size, self.activation_hidden, self.activation_output, self.do_batch_norm, "{0:03d}".format(self.layers_number)); # Return number of steps used def train_model(self, model, train_data, train_target, context): key = self.get_key() if key in self.sols and self.sols[key] == -1: return step = 0 # Put model in train mode model.train() # Note: we need to move this out of circle, since we need to save state for momentum to work optimizer = optim.SGD(model.parameters(), lr=self.learning_rate, momentum=self.momentum) while True: time_left = context.get_timer().get_time_left() data = train_data target = train_target # model.parameters()...gradient set to zero optimizer.zero_grad() # evaluate model => model.forward(data) output = model(data) # if x < 0.5 predict 0 else predict 1 predict = output.round() # Number of correct predictions correct = predict.eq(target.view_as(predict)).long().sum().item() # Total number of needed predictions total = target.view(-1).size(0) if correct == total or time_left < 0.1 or (self.grid_search.enabled and step > 100): if not key in self.sols: self.sols[key] = 0 self.solsSum[key] = 0 self.sols[key] += 1 self.solsSum[key] += step if self.sols[key] == len(self.random_grid): print("{} {:.4f}".format(key, float(self.solsSum[key])/self.sols[key])) break # calculate loss loss = ((output-target)**2).sum() # calculate deriviative of model.forward() and put it in model.parameters()...gradient loss.backward() # print progress of the learning #self.print_stats(step, loss, correct, total) # update model: model.parameters() -= lr * gradient optimizer.step() step += 1 return step def print_stats(self, step, loss, correct, total): if step % 1000 == 0: print("Step = {} Prediction = {}/{} Error = {}".format(step, correct, total, loss.item()))
class Solution(): def htanh02(self, x): return nn.Hardtanh(-0.2, 0.2)(x) def custom(self, x): return self.htanh02(0.72 * x) + self.htanh02(0.27 * x) + self.htanh02( 0.2 * x) + self.htanh02(0.2 * x) + self.htanh02(0.1 * x) + 0.2 * x def __init__(self): self.best_step = 1000 self.activations = { 'sigmoid': nn.Sigmoid(), 'custom': self.custom, 'relu': nn.ReLU(), 'relu6': nn.ReLU6(), 'rrelu0103': nn.RReLU(0.1, 0.3), 'rrelu0205': nn.RReLU(0.2, 0.5), 'htang1': nn.Hardtanh(-1, 1), 'htang2': nn.Hardtanh(-2, 2), 'htang3': nn.Hardtanh(-3, 3), 'tanh': nn.Tanh(), 'elu': nn.ELU(), 'selu': nn.SELU(), 'hardshrink': nn.Hardshrink(), 'leakyrelu01': nn.LeakyReLU(0.1), 'leakyrelu001': nn.LeakyReLU(0.01), 'logsigmoid': nn.LogSigmoid(), 'prelu': nn.PReLU(), } self.learning_rate = 1.0 self.hidden_size = 11 self.activation_hidden = 'relu' self.activation_output = 'sigmoid' self.sols = {} self.solsSum = {} self.random = 0 self.random_grid = [_ for _ in range(10)] self.hidden_size_grid = [3, 5, 7, 11] self.learning_rate_grid = [0.001, 0.01, 0.1, 1.0, 10.0, 100.0] #self.learning_rate_grid = [1.0 + i/100.0 for i in range(10)] self.activation_hidden_grid = self.activations.keys() #self.activation_output_grid = self.activations.keys() self.grid_search = GridSearch(self) self.grid_search.set_enabled(True) def create_model(self, input_size, output_size): return SolutionModel(input_size, output_size, self) # Return number of steps used def train_model(self, model, train_data, train_target, context): step = 0 # Put model in train mode model.train() while True: time_left = context.get_timer().get_time_left() # No more time left, stop training key = "{}_{}_{}_{}".format(self.learning_rate, self.hidden_size, self.activation_hidden, self.activation_output) # Speed up search if time_left < 0.1 or (self.grid_search.enabled and step > 40): if not key in self.sols: self.sols[key] = 0 self.solsSum[key] = 0 self.sols[key] += 1 self.solsSum[key] += step self.sols[key] = -1 break if key in self.sols and self.sols[key] == -1: break optimizer = optim.SGD(model.parameters(), lr=self.learning_rate) data = train_data target = train_target # model.parameters()...gradient set to zero optimizer.zero_grad() # evaluate model => model.forward(data) output = model(data) # if x < 0.5 predict 0 else predict 1 predict = output.round() # Number of correct predictions correct = predict.eq(target.view_as(predict)).long().sum().item() # Total number of needed predictions total = target.view(-1).size(0) if correct == total: if not key in self.sols: self.sols[key] = 0 self.solsSum[key] = 0 self.sols[key] += 1 self.solsSum[key] += step #if self.sols[key] > 1: # print("Key = {} Avg = {:.2f} Ins = {}".format(key, float(self.solsSum[key])/self.sols[key], self.sols[key])) if self.sols[key] == len(self.random_grid): #self.best_step = step print( "Learning rate = {} Hidden size = {} Activation hidden = {} Activation output = {} Steps = {}" .format(self.learning_rate, self.hidden_size, self.activation_hidden, self.activation_output, step)) print("{:.4f}".format( float(self.solsSum[key]) / self.sols[key])) break # calculate loss loss = ((output - target)**2).sum() # calculate deriviative of model.forward() and put it in model.parameters()...gradient loss.backward() # print progress of the learning #self.print_stats(step, loss, correct, total) # update model: model.parameters() -= lr * gradient optimizer.step() step += 1 return step def print_stats(self, step, loss, correct, total): if step % 1000 == 0: print("Step = {} Prediction = {}/{} Error = {}".format( step, correct, total, loss.item()))
class Solution(): def __init__(self): self.best_step = sys.maxsize self.sols = {} self.solsSum = {} self.hidden_size = 50 self.lr = 0.01 self.activation_hidden = 'relu6' self.activation_output = 'sigmoid' self.activations = { 'sigmoid': nn.Sigmoid(), 'relu': nn.ReLU(), 'relu6': nn.ReLU6(), 'rrelu0103': nn.RReLU(0.1, 0.3), 'rrelu0205': nn.RReLU(0.2, 0.5), 'htang1': nn.Hardtanh(-1, 1), 'htang2': nn.Hardtanh(-2, 2), 'htang3': nn.Hardtanh(-3, 3), 'tanh': nn.Tanh(), 'elu': nn.ELU(), 'selu': nn.SELU(), 'hardshrink': nn.Hardshrink(), 'leakyrelu01': nn.LeakyReLU(0.1), 'leakyrelu001': nn.LeakyReLU(0.01), 'logsigmoid': nn.LogSigmoid(), 'prelu': nn.PReLU(), } self.hidden_size_grid = [16, 20, 26, 32, 36, 40, 45, 50, 54] self.lr_grid = [0.0001, 0.001, 0.005, 0.01, 0.1, 1] # self.lr_grid = [0.1, .5, 1, 1.5, 2, 3, 5, 10] # self.activation_hidden_grid = list(self.activations.keys()) # self.activation_output_grid = list(self.activations.keys()) self.grid_search = GridSearch(self) self.grid_search.set_enabled(False) def create_model(self, input_size, output_size): return SolutionModel(input_size, output_size, self) # Return number of steps used def train_model(self, model, train_data, train_target, context): step = 0 # Put model in train mode model.train() criterion = F.binary_cross_entropy # optimizer = optim.SGD(model.parameters(), lr=model.lr, momentum=0.9) optimizer = optim.Adam(model.parameters(), lr=model.lr) while True: time_left = context.get_timer().get_time_left() key = "{}_{}_{}_{}".format(self.lr, self.hidden_size, self.activation_hidden, self.activation_output) # No more time left, stop training if time_left < 0.1 or (model.solution.grid_search.enabled and step > 100): if not key in self.sols: self.sols[key] = 0 self.solsSum[key] = 0 self.sols[key] += 1 self.solsSum[key] += step self.sols[key] = -1 break if key in self.sols and self.sols[key] == -1: break data = train_data target = train_target # model.parameters()...gradient set to zero optimizer.zero_grad() # evaluate model => model.forward(data) output = model(data) # if x < 0.5 predict 0 else predict 1 predict = output.round() # Number of correct predictions correct = predict.eq(target.view_as(predict)).long().sum().item() # Total number of needed predictions total = target.view(-1).size(0) if total == correct: if not key in self.sols: self.sols[key] = 0 self.solsSum[key] = 0 self.sols[key] += 1 self.solsSum[key] += step if step < 21: self.best_step = step loss = criterion(output, target) self.print_stats(step, loss, correct, total, model) print("{:.4f}".format(float(self.solsSum[key])/self.sols[key])) return step # calculate loss loss = criterion(output, target) # calculate deriviative of model.forward() and put it in model.parameters()...gradient loss.backward() # update model: model.parameters() -= lr * gradient optimizer.step() step += 1 return step def print_stats(self, step, loss, correct, total, model): print("LR={}, HS={}, ActivHidden={}, ActivOut={}, Step = {} Prediction = {}/{} Error = {}".format(model.lr, model.hidden_size, model.activation_hidden, model.activation_output, step, correct, total, loss.item()))