class Main(App): def build(self): self._init_view() main_l = BoxLayout(orientation='vertical') self.nnet = NNet(n_input=INPUT, n_hidden=HIDDEN, n_output=OUTPUT, learning_rate=LR) self._prepare_nnet() main_l.add_widget(self._paint_w) main_l.add_widget(self._conf_l) return main_l def _init_view(self): self._conf_l = BoxLayout(size_hint=(None, None), width=WINDOW_WIDTH, height=CONFIG_HEIGHT) self._paint_w = PaintWidget(size_hint=(None, None), width=WINDOW_WIDTH, height=PAINT_HEIGHT) self._clear_b = Button(text='clear') self._clear_b.bind(on_press=self.clear) self._query_b = Button(text='query') self._query_b.bind(on_press=self.query) self._conf_l.add_widget(self._clear_b) self._conf_l.add_widget(self._query_b) def _prepare_nnet(self): try: self.nnet.restore(MODEL_PATH) except: from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets('MNIST_data/', one_hot=True) for episode in range(EPISODES): batch_input, batch_target = mnist.train.next_batch(BATCH_SIZE) if episode % 100 == 0: status = True else: status = False self.nnet.train(batch_input, batch_target, status) self.nnet.save(MODEL_PATH) def clear(self, instance): self._paint_w.clear_canvas() def query(self, instance): predict = str( self.nnet.predict( self._paint_w.get_prepared_data( (28, 28)).reshape(1, INPUT) / 255)[0]) Popup(title='predict', content=Label(text=predict), size_hint=(None, None), size=(200, 200)).open()
def produce_new_version(self): new_net = NNet(self.input_nodes, self.output_nodes) new_net.train(self.examples) az = AlphaZero(self.state_encoder, self.exploration_rate, self.number_of_mcts_simulations) az.net = new_net return az
def train(table: str = c.DEFAULT_TRAINING_TABLE, model_name: str = c.DEFAULT_MODEL_NAME, matches: int = 10, threshold: int = c.DEFAULT_THRESHOLD, learning_rate: float = c.DEFAULT_LEARNING_RATE, epochs: int = c.DEFAULT_EPOCHS, batch_size: int = c.DEFAULT_BATCH_SIZE, data_limit: int = 600000) -> None: new_net = NNet(learning_rate=learning_rate, epochs=epochs, batch_size=batch_size, model_name=model_name) old_net = NNet(model_name=model_name) db = Connector() examples = db.df_to_examples(db.retrieve_data( query=f"SELECT * FROM {table} ORDER BY counter DESC LIMIT {data_limit};" )) new_net.train(examples) score = _match_series(nnet1=new_net, nnet2=old_net, matches=matches) _evaluate_score(new_net, score, model_name, threshold)
def _train_new_net(episodes: int, new_net: NNet, rollout: bool, iterations: int) -> None: print('-' * 20) examples = [] for i in range(episodes): if rollout: new_examples = _run_episode(iterations=iterations) else: new_examples = _run_episode(nnet=new_net, iterations=iterations) for ex in new_examples: examples.append(ex) examples.append(_mirror_example(ex)) sys.stdout.write(f'\repisode: {i + 1}/{episodes}') sys.stdout.flush() print('') new_net.train(examples)
class Model(): def __init__(self, game): self.epoch_num = 10 self.nnet = NNet(game, args) self.x, self.y = game.get_board_size() self.action_size = game.get_action_size() self.nnet.cuda() def train(self, examples): """ use (board, policy, win rate) to train the nnet """ optimizer = optim.Adam(self.nnet.parameters(), lr=1e-7, weight_decay=1e-7 ) average_loss = 0 total_batch_num = 0 for epoch in range(self.epoch_num): epoch_loss = 0 batch_idx = 0 while batch_idx < int(len(examples)/args.batch_size): ids = np.random.randint(len(examples), size=args.batch_size) state, policy, v = list(zip(*[examples[i] for i in ids])) state = torch.Tensor(np.array(state)).contiguous().cuda() target_policy = torch.Tensor( np.array(policy)).contiguous().cuda() target_v = torch.Tensor(np.array(v)).contiguous().cuda() # predict self.nnet.eval() out_policy, out_v = self.nnet(state) self.nnet.train() total_loss = self.loss( target_policy, out_policy, target_v, out_v) ''' print("state:\n {}".format(state[3])) print("policy:\n {}".format(target_policy[3])) print("nn_policy:\n {}".format(out_policy[3])) ''' average_loss += abs(np.sum(total_loss.cpu().data.numpy())) epoch_loss += abs(np.sum(total_loss.cpu().data.numpy())) # print("loss in batch {} is {}".format(batch_idx, total_loss.cpu().data.numpy())) # compute gradient and do SGD step optimizer.zero_grad() total_loss.sum().backward() optimizer.step() batch_idx += 1 total_batch_num += 1 print('epoch: {}, loss: {}'.format(epoch, epoch_loss/batch_idx)) self.nnet.eval() return average_loss / total_batch_num def predict(self, board): """ board: np array with board """ # preparing input board = torch.Tensor(board.astype(np.float64)) board = board.contiguous().cuda() board = board.view(1, self.x, self.y) self.nnet.eval() with torch.no_grad(): policy, v = self.nnet(board) return policy.data.cpu().numpy()[0], v.data.cpu().numpy()[0] def save_checkpoint(self, folder='train', filename='checkpoint.pth.tar'): filepath = os.path.join(folder, filename) if not os.path.exists(folder): os.mkdir(folder) torch.save({ 'state_dict': self.nnet.state_dict(), }, filepath) def load_checkpoint(self, folder='train', filename='checkpoint.pth.tar'): filepath = os.path.join(folder, filename) if not os.path.exists(filepath): raise("no model in {}".format(filepath)) checkpoint = torch.load(filepath) self.nnet.load_state_dict(checkpoint['state_dict']) def loss(self, targets_p, outputs_p, target_v, outputs_v): ''' print("loss:") print(-torch.sum(targets_p*outputs_p, dim=1)) print((target_v-outputs_v.view(-1))**2) print(-torch.sum(targets_p*outputs_p, dim=1) + (target_v-outputs_v.view(-1))**2) ''' return -torch.sum(targets_p*torch.log(outputs_p), dim=1) / self.action_size + (target_v-outputs_v.view(-1))**2
# vprint(0, nn, quit=True) for j in range(iterations): correct_cnt = 0 for i in range(int(len(images) / batch_size)): batch_start, batch_end = ((i * batch_size), ((i + 1) * batch_size)) prediction = nn.fire(images[batch_start:batch_end], masks) vprint(i, nn, suffix='a', quit=True) vprint(i, nn.dropout_masks[1], suffix='m', quit=True) for k in range(batch_size): correct_cnt += int( np.argmax(prediction[k:k + 1]) == np.argmax(labels[batch_start + k:batch_start + k + 1])) nn.train(labels[batch_start:batch_end]) vprint(i, nn, suffix='b', quit=True) test_correct_cnt = 0 for i in range(len(test_images)): prediction = nn.fire(test_images[i:i + 1]) test_correct_cnt += int(np.argmax(prediction) == np.argmax(test_labels[i:i + 1])) if (j % 10 == 0): print("I:" + str(j) + \ " Test-Acc:" + str(test_correct_cnt / float(len(test_images))) + \ " Train-Acc:" + str(correct_cnt / float(len(images))) )