class SimpleQBot(qbot.QBot): """A simple Q-bot Attributes: entity_iden (int): the entity we are controlling model (FeedforwardComplex): the model that does the evaluating teacher (FFTeacher): the teacher for the model optimizer (torch.nn.optimizer): the optimizer for the network criterion (callable): the evaluator for the network offline (OfflineLearner): the offline learner encoder (Encoder): the encoder """ def __init__(self, entity_iden): self.entity_iden = entity_iden self.model = gen.init_or_load_model(_init_model, MODELFILE) self.teacher = FFTeacher() self.optimizer = torch.optim.Adam( [p for p in self.model.parameters() if p.requires_grad], lr=0.003) self.criterion = torch.nn.MSELoss() self.encoder = _init_encoder(entity_iden) self.offline = OfflineLearner(self._learn, heap_size=10) def __call__(self, entity_iden): self.entity_iden = entity_iden self.encoder = _init_encoder(entity_iden) @property def cutoff(self): return 3 @property def alpha(self): return 0.3 def evaluate(self, game_state: GameState, move: Move) -> float: result = torch.tensor([0.0], dtype=torch.float) self.teacher.classify(self.model, self.encoder.encode(game_state, move), result) return float(result.item()) def learn(self, game_state: GameState, move: Move, reward: float) -> None: self.offline(game_state, move, reward) def think(self, max_time: float): self.offline.think(max_time) def _learn(self, game_state: GameState, move: Move, reward: float) -> None: self.teacher.teach(self.model, self.optimizer, self.criterion, self.encoder.encode(game_state, move), torch.tensor([reward], dtype=torch.float32)) return abs(reward) def save(self) -> None: gen.save_model(self.model, MODELFILE)
def __init__(self, entity_iden): self.entity_iden = entity_iden self.model = gen.init_or_load_model(_init_model, MODELFILE) self.teacher = FFTeacher() self.optimizer = torch.optim.Adam( [p for p in self.model.parameters() if p.requires_grad], lr=0.003) self.criterion = torch.nn.MSELoss() self.encoder = _init_encoder(entity_iden) self.offline = OfflineLearner(self._learn, heap_size=10)
def __init__(self, entity_iden: int): super().__init__(entity_iden) self.model = _init_or_load_model() self.history = deque() self.teacher = FFTeacher() self.optimizer = torch.optim.Adam( [p for p in self.model.parameters() if p.requires_grad], lr=0.003) self.criterion = torch.nn.MSELoss() self.spam_loss = False self.spam_moves = False self.print_loss_improves = True self.random_perc = 0.2 self.best_loss = float('inf') self.next_save = 50
def offline_learning(): """Loads the replay buffer and trains on it.""" perf_file = os.path.join(SAVEDIR, 'offline_learning_perf.log') perf = perf_stats.LoggingPerfStats('deep1 offline learning', perf_file) replay = replay_buffer.FileReadableReplayBuffer(REPLAY_FOLDER, perf=perf) try: print(f'loaded {len(replay)} experiences for replay...') if not os.path.exists(MODELFILE): _init_model() network = Deep1ModelTrain.load(MODELFILE) teacher = MyTeacher(FFTeacher()) train_pwl = MyPWL(replay, Deep1ModelEval.load(EVAL_MODELFILE), teacher) test_pwl = train_pwl def update_target(ctx: tnr.GenericTrainingContext, hint: str): ctx.logger.info('swapping target network, hint=%s', hint) network.save(MODELFILE, exist_ok=True) new_target = Deep1ModelToEval(network.fc_layers) for _ in range(3): train_pwl.mark() for _ in range(0, 1024, ctx.batch_size): train_pwl.fill(ctx.points, ctx.labels) teacher.classify_many(new_target, ctx.points, ctx.labels.unsqueeze(1)) new_target.learning_to_current() train_pwl.reset() new_target = new_target.to_evaluative() new_target.save(EVAL_MODELFILE, exist_ok=True) train_pwl.target_model = new_target trainer = tnr.GenericTrainer( train_pwl=train_pwl, test_pwl=test_pwl, teacher=teacher, batch_size=32, learning_rate=0.0001, optimizer=torch.optim.Adam( [p for p in network.parameters() if p.requires_grad], lr=0.0001), criterion=torch.nn.MSELoss()) (trainer.reg(tnr.EpochsTracker()).reg(tnr.EpochsStopper(100)).reg( tnr.InfOrNANDetecter()).reg(tnr.InfOrNANStopper()).reg( tnr.DecayTracker()).reg(tnr.DecayStopper(1)).reg( tnr.OnEpochCaller.create_every(update_target, skip=CUTOFF)) # smaller cutoffs require more bootstrapping .reg(tnr.DecayOnPlateau())) res = trainer.train(network, target_dtype=torch.float32, point_dtype=torch.float32, perf=perf) if res['inf_or_nan']: print('training failed! inf or nan!') finally: replay.close()
def __init__(self, entity_iden: int, replay_path=REPLAY_FOLDER, evaluation=False): self.entity_iden = entity_iden if not os.path.exists(EVAL_MODELFILE): _init_model() self.model = Deep1ModelEval.load(EVAL_MODELFILE) self.teacher = FFTeacher() self.evaluation = evaluation self.encoder = init_encoder(entity_iden) if not evaluation: self.replay = replay_buffer.FileWritableReplayBuffer(replay_path, exist_ok=True) else: self.replay = None
def _run(args): executable = 'python3' if args.py3 else 'python' port = 1769 nthreads = args.numthreads settings = deep1_runner.TrainSettings( train_bot='or_reinforce.deep.deep1.deep1', adver_bot='optimax_rogue_bots.randombot.RandomBot', bot_folder=os.path.join('out', 'or_reinforce', 'deep', 'deep1'), train_seq=[ deep1_runner.SessionSettings( tie_len=111, tar_ticks=2000, train_force_amount=args.train_force_amount) ], cur_ind=0) deep1_runner._get_experiences_async( # pylint: disable=protected-access settings, executable, port, port + nthreads * 10, 0, False, False, nthreads) replay = replay_buffer.FileReadableReplayBuffer(deep1.REPLAY_FOLDER) try: print(f'loaded {len(replay)} experiences for analysis...') network = deep1.Deep1ModelEval.load(deep1.EVAL_MODELFILE) teacher = deep1.MyTeacher(FFTeacher()) pwl = deep1.MyPWL(replay, deep1.Deep1ModelEval.load(deep1.EVAL_MODELFILE), teacher) print('--fetching top 2 pcs--') traj: pca_gen.PCTrajectoryGen = pca_gen.find_trajectory( network, pwl, 2) print('--plotting top 2 pcs--') pca_gen.plot_trajectory(traj, os.path.join(SAVEDIR, 'pca'), exist_ok=True, transparent=False, compress=False, s=16) print('--finished--') finally: replay.close()
class SimpleBot(Bot): """Simple pathfinding bot Attributes: history (deque[GameState]): recent game states, where the left corresponds to len(history) ticks ago and the right corresponds to the last tick model (FeedforwardComplex): the model that predicts q-values teacher (FFTeacher): the teacher for the model optimizer (torch.nn.Optimizer): the optimizer criterion (callable): criterion """ def __init__(self, entity_iden: int): super().__init__(entity_iden) self.model = _init_or_load_model() self.history = deque() self.teacher = FFTeacher() self.optimizer = torch.optim.Adam( [p for p in self.model.parameters() if p.requires_grad], lr=0.003) self.criterion = torch.nn.MSELoss() self.spam_loss = False self.spam_moves = False self.print_loss_improves = True self.random_perc = 0.2 self.best_loss = float('inf') self.next_save = 50 def move(self, game_state: GameState): gs_copy = ser.deserialize(ser.serialize(game_state)) self.history.append((gs_copy, None)) if len(self.history) == CUTOFF + 1: self.teach() move = self.eval(game_state) if np.random.uniform(0, 1) < self.random_perc: move = random.choice(MOVE_MAP) self.history.pop() self.history.append((gs_copy, move)) self.next_save -= 1 if self.next_save <= 0: self.save() self.next_save = 50 return move def finished(self, game_state: GameState, result): self.save() def save(self): """saves the model""" print(f'[simplebot] {time.ctime()} saving') sys.stdout.flush() _save_model(self.model) def teach(self): """Must be called when we have CUTOFF+1 history. Takes the oldest history item, calculates the value for the finite series of diminished rewards, and then trains the network on that""" original, og_move = self.history.popleft() previous = original penalty = 1 reward = 0 for i in range(CUTOFF): reward += penalty * _reward(previous, self.history[i][0], self.entity_iden) previous = self.history[i][0] penalty *= ALPHA loss = self.teacher.teach(self.model, self.optimizer, self.criterion, _encode(original, self.entity_iden, og_move), torch.tensor([reward], dtype=torch.float32)) if self.spam_loss: print(f'[simplebot] loss={loss}') sys.stdout.flush() if self.print_loss_improves: if loss < self.best_loss: self.best_loss = loss print(f'[simplebot] loss improved to {loss} for move ' + f'{og_move.name} reward {reward}') sys.stdout.flush() def eval(self, game_state: GameState) -> Move: """Chooses the best move according to our model for the given state""" scores = [] out = torch.tensor([0.0]) for move in MOVE_MAP: self.teacher.classify(self.model, _encode(game_state, self.entity_iden, move), out) scores.append(out.item()) if self.spam_moves: toprint = [] for ind, move in enumerate(MOVE_MAP): toprint.extend((str(move), ': ', f'{scores[ind]:.3f}')) print('{' + ', '.join(toprint) + '}') sys.stdout.flush() return MOVE_MAP[int(np.argmax(scores))]
def main(): """Entry point""" pwl = GaussianSpheresPWLP(epoch_size=1000, input_dim=2, output_dim=2, clusters=[ PointWithLabel(point=torch.tensor( (-1, 0), dtype=torch.double), label=0), PointWithLabel(point=torch.tensor( (1, 0), dtype=torch.double), label=1) ], std_dev=0.4, mean=0) layers = [(50, True, False)] layer_names = ['input', 'hidden', 'output'] network = FeedforwardLarge.create(input_dim=2, output_dim=2, weights=wi.GaussianWeightInitializer( mean=0, vari=0.1, normalize_dim=1), biases=wi.ZerosWeightInitializer(), layer_sizes=layers, nonlinearity='linear', train_readout_weights=False, train_readout_bias=False) trainer = tnr.GenericTrainer( train_pwl=pwl, test_pwl=pwl, teacher=FFTeacher(), batch_size=1, learning_rate=0.003, optimizer=torch.optim.Adam( [p for p in network.parameters() if p.requires_grad], lr=0.003), criterion=mycrits.create_meansqerr_regul( noise_strength=0.5) #torch.nn.CrossEntropyLoss() ) pca3d_throughtrain.FRAMES_PER_TRAIN = 1 pca3d_throughtrain.SKIP_TRAINS = 0 pca3d_throughtrain.NUM_FRAME_WORKERS = 4 dig = npmp.NPDigestor('train_one', 5) #pca_3d.plot_ff(pca_ff.find_trajectory(network, pwl, 3), os.path.join(SAVEDIR, 'pca_3d_start'), True, # digestor=dig, frame_time=FRAME_TIME, layer_names=layer_names) dtt_training_dir = os.path.join(SAVEDIR, 'dtt') pca_training_dir = os.path.join(SAVEDIR, 'pca') pr_training_dir = os.path.join(SAVEDIR, 'pr') svm_training_dir = os.path.join(SAVEDIR, 'svm') satur_training_dir = os.path.join(SAVEDIR, 'saturation') pca_throughtrain_dir = os.path.join(SAVEDIR, 'pca_throughtrain') (trainer.reg(tnr.EpochsTracker()).reg(tnr.EpochsStopper(10)).reg( tnr.DecayTracker()) #.reg(tnr.DecayStopper(8)) #.reg(tnr.LRMultiplicativeDecayer()) .reg(tnr.DecayOnPlateau()).reg(tnr.AccuracyTracker(5, 1000, True)) #.reg(tnr.WeightNoiser( # wi.GaussianWeightInitializer(mean=0, vari=0.02, normalize_dim=None), # lambda ctxt: ctxt.model.layers[-1].weight.data)) .reg( tnr.OnEpochCaller.create_every( satur.during_training(satur_training_dir, True, dig), skip=10)).reg( tnr.OnEpochCaller.create_every( dtt.during_training_ff(dtt_training_dir, True, dig), skip=10)).reg( tnr.OnEpochCaller.create_every( pca_ff.during_training(pca_training_dir, True, dig, alpha=0.8), skip=10)).reg( tnr.OnEpochCaller.create_every( pr.during_training_ff( pr_training_dir, True, dig), skip=1000)).reg( tnr.OnEpochCaller.create_every( svm.during_training_ff( svm_training_dir, True, dig), skip=1000)) #.reg(pca3d_throughtrain.PCAThroughTrain(pca_throughtrain_dir, layer_names, True)) .reg(tnr.OnFinishCaller(lambda *args, **kwargs: dig.join())).reg( tnr.ZipDirOnFinish(dtt_training_dir)).reg( tnr.ZipDirOnFinish(pca_training_dir)).reg( tnr.ZipDirOnFinish(pr_training_dir)).reg( tnr.ZipDirOnFinish(svm_training_dir)).reg( tnr.ZipDirOnFinish(satur_training_dir))) trainer.train(network) #pca_3d.plot_ff(pca_ff.find_trajectory(network, pwl, 3), os.path.join(SAVEDIR, 'pca_3d_end'), True, # digestor=dig, frame_time=FRAME_TIME, layer_names=layer_names) dig.archive_raw_inputs(os.path.join(SAVEDIR, 'raw_digestor.zip'))
def train_with_noise(vari, rep, ignoreme): # pylint: disable=unused-argument """Entry point""" train_pwl = MNISTData.load_train().to_pwl().restrict_to(set( range(10))).rescale() test_pwl = MNISTData.load_test().to_pwl().restrict_to(set( range(10))).rescale() layers_and_nonlins = ( (90, 'tanh'), (90, 'tanh'), (90, 'tanh'), (90, 'tanh'), (90, 'tanh'), ) layers = [lyr[0] for lyr in layers_and_nonlins] nonlins = [lyr[1] for lyr in layers_and_nonlins] nonlins.append('tanh') # output #layer_names = [f'{lyr[1]} (layer {idx})' for idx, lyr in enumerate(layers_and_nonlins)] layer_names = [ f'Layer {idx+1}' for idx, lyr in enumerate(layers_and_nonlins) ] layer_names.insert(0, 'Input') layer_names.append('Output') network = FeedforwardLarge.create(input_dim=train_pwl.input_dim, output_dim=train_pwl.output_dim, weights=wi.GaussianWeightInitializer( mean=0, vari=0.3, normalize_dim=0), biases=wi.ZerosWeightInitializer(), layer_sizes=layers, nonlinearity=nonlins #layer_sizes=[500, 200] ) _lr = 0.1 trainer = tnr.GenericTrainer( train_pwl=train_pwl, test_pwl=test_pwl, teacher=FFTeacher(), batch_size=30, learning_rate=_lr, optimizer=torch.optim.SGD( [p for p in network.parameters() if p.requires_grad], lr=_lr ), #torch.optim.Adam([p for p in network.parameters() if p.requires_grad], lr=0.003), criterion=mycrits.meansqerr #torch.nn.CrossEntropyLoss()# ) #pca3d_throughtrain.FRAMES_PER_TRAIN = 4 #pca3d_throughtrain.SKIP_TRAINS = 0 #pca3d_throughtrain.NUM_FRAME_WORKERS = 6 dig = npmp.NPDigestor(f'TRMCN_{rep}_{vari}', 8) savedir = os.path.join(SAVEDIR, f'variance_{vari}', f'repeat_{rep}') dtt_training_dir = os.path.join(savedir, 'dtt') pca_training_dir = os.path.join(savedir, 'pca') pca3d_training_dir = os.path.join(savedir, 'pca3d') pr_training_dir = os.path.join(savedir, 'pr') svm_training_dir = os.path.join(savedir, 'svm') satur_training_dir = os.path.join(savedir, 'saturation') trained_net_dir = os.path.join(savedir, 'trained_model') pca_throughtrain_dir = os.path.join(savedir, 'pca_throughtrain') logpath = os.path.join(savedir, 'log.txt') (trainer.reg(tnr.EpochsTracker()).reg(tnr.EpochsStopper(0.2)).reg( tnr.DecayTracker()).reg(tnr.DecayStopper(5)).reg( tnr.LRMultiplicativeDecayer()) #.reg(tnr.DecayOnPlateau()) #.reg(tnr.DecayEvery(5)) .reg(tnr.AccuracyTracker(1, 1000, True)).reg( tnr.WeightNoiser( wi.GaussianWeightInitializer(mean=0, vari=vari), (lambda ctx: ctx.model.layers[-1].weight.data.detach()), 'scale', (lambda noise: wi.GaussianWeightInitializer(0, noise.vari * 0.5) ))) #.reg(tnr.OnEpochCaller.create_every(dtt.during_training_ff(dtt_training_dir, True, dig), skip=100)) #.reg(tnr.OnEpochCaller.create_every(pca_3d.during_training(pca3d_training_dir, True, dig, plot_kwargs={'layer_names': layer_names}), start=500, skip=100)) #.reg(tnr.OnEpochCaller.create_every(pca_ff.during_training(pca_training_dir, True, dig), skip=100)) .reg( tnr.OnEpochCaller.create_every(pr.during_training_ff( pr_training_dir, True, dig), skip=1)) #.reg(tnr.OnEpochCaller.create_every(svm.during_training_ff(svm_training_dir, True, dig), skip=100)) #.reg(tnr.OnEpochCaller.create_every(satur.during_training(satur_training_dir, True, dig), skip=100)) .reg( tnr.OnEpochCaller.create_every(tnr.save_model(trained_net_dir), skip=100)) #.reg(pca3d_throughtrain.PCAThroughTrain(pca_throughtrain_dir, layer_names, True)) .reg(tnr.OnFinishCaller(lambda *args, **kwargs: dig.join())).reg( tnr.CopyLogOnFinish(logpath)).reg( tnr.ZipDirOnFinish(dtt_training_dir)).reg( tnr.ZipDirOnFinish(pca_training_dir)).reg( tnr.ZipDirOnFinish(pca3d_training_dir)).reg( tnr.ZipDirOnFinish(pr_training_dir)).reg( tnr.ZipDirOnFinish(svm_training_dir)).reg( tnr.ZipDirOnFinish(satur_training_dir)).reg( tnr.ZipDirOnFinish(trained_net_dir))) trainer.train(network) dig.archive_raw_inputs(os.path.join(savedir, 'digestor_raw.zip'))
def main(): """Entry point""" pwl = GaussianSpheresPWLP.create(epoch_size=2700, input_dim=INPUT_DIM, output_dim=OUTPUT_DIM, cube_half_side_len=2, num_clusters=10, std_dev=0.04, mean=0, min_sep=0.1) nets = cu.FluentShape(INPUT_DIM).verbose() network = FeedforwardComplex(INPUT_DIM, OUTPUT_DIM, [ nets.linear_(90), nets.nonlin('isrlu'), nets.linear_(OUTPUT_DIM), ]) trainer = tnr.GenericTrainer( train_pwl=pwl, test_pwl=pwl, teacher=FFTeacher(), batch_size=45, learning_rate=0.001, optimizer=torch.optim.Adam( [p for p in network.parameters() if p.requires_grad], lr=0.001), criterion=torch.nn.CrossEntropyLoss()) dig = npmp.NPDigestor('train_one_complex', 16) #pca_3d.plot_ff(pca_ff.find_trajectory(network, pwl, 3), os.path.join(SAVEDIR, 'pca_3d_start'), True, dig3d) #dig3d.join() #exit() dtt_training_dir = os.path.join(SAVEDIR, 'dtt') pca_training_dir = os.path.join(SAVEDIR, 'pca') pr_training_dir = os.path.join(SAVEDIR, 'pr') svm_training_dir = os.path.join(SAVEDIR, 'svm') satur_training_dir = os.path.join(SAVEDIR, 'saturation') (trainer.reg(tnr.EpochsTracker()).reg(tnr.EpochsStopper(150)).reg( tnr.DecayTracker()).reg(tnr.DecayStopper(3)).reg( tnr.LRMultiplicativeDecayer()).reg(tnr.DecayOnPlateau()).reg( tnr.AccuracyTracker(5, 1000, True)).reg( tnr.OnEpochCaller.create_every(dtt.during_training_ff( dtt_training_dir, True), skip=1000)) #.reg(tnr.OnEpochCaller.create_every(pca_ff.during_training(pca_training_dir, True), skip=1000)) .reg( tnr.OnEpochCaller.create_every( pr.during_training_ff(pr_training_dir, True), skip=1000)).reg( tnr.OnEpochCaller.create_every( svm.during_training_ff(svm_training_dir, True), skip=1000)).reg( tnr.OnEpochCaller.create_every(satur.during_training( satur_training_dir, True), skip=1000)). reg(tnr.ZipDirOnFinish(dtt_training_dir)).reg( tnr.ZipDirOnFinish(pca_training_dir)).reg( tnr.ZipDirOnFinish(pr_training_dir)).reg( tnr.ZipDirOnFinish(svm_training_dir)).reg( tnr.ZipDirOnFinish(satur_training_dir))) trainer.train(network) torch.save(network.state_dict(), os.path.join(SAVEDIR, 'trained_network.pt'))
def main(): """Entry point""" pwl = GaussianSpheresPWLP.create(epoch_size=2700, input_dim=INPUT_DIM, output_dim=OUTPUT_DIM, cube_half_side_len=2, num_clusters=10, std_dev=0.5, mean=0, min_sep=1, force_split=True) layers_and_nonlins = ( (100, 'tanh'), #(100, 'linear'), #(25, 'linear'), #(90, 'tanh'), #(90, 'tanh'), #(90, 'linear'), #(25, 'linear'), ) layers = [lyr[0] for lyr in layers_and_nonlins] nonlins = [lyr[1] for lyr in layers_and_nonlins] nonlins.append('tanh') # output layer_names = [ f'{lyr[1]} ({idx})' for idx, lyr in enumerate(layers_and_nonlins) ] layer_names.insert(0, 'input') layer_names.append('output') network = FeedforwardLarge.create(input_dim=INPUT_DIM, output_dim=OUTPUT_DIM, weights=wi.GaussianWeightInitializer( mean=0, vari=0.3, normalize_dim=1), biases=wi.ZerosWeightInitializer(), layer_sizes=layers, nonlinearity=nonlins) trainer = tnr.GenericTrainer( train_pwl=pwl, test_pwl=pwl, teacher=FFTeacher(), batch_size=20, learning_rate=0.001, optimizer=torch.optim.Adam( [p for p in network.parameters() if p.requires_grad], lr=0.001), criterion=mycrits.meansqerr #torch.nn.CrossEntropyLoss() ) pca3d_throughtrain.FRAMES_PER_TRAIN = 1 pca3d_throughtrain.SKIP_TRAINS = 4 pca3d_throughtrain.NUM_FRAME_WORKERS = 6 dig = npmp.NPDigestor('train_one', 35) #pca_3d.plot_ff(pca_ff.find_trajectory(network, pwl, 3), os.path.join(SAVEDIR, 'pca_3d_start'), True, # digestor=dig, frame_time=FRAME_TIME, layer_names=layer_names) dtt_training_dir = os.path.join(SAVEDIR, 'dtt') pca_training_dir = os.path.join(SAVEDIR, 'pca') pr_training_dir = os.path.join(SAVEDIR, 'pr') svm_training_dir = os.path.join(SAVEDIR, 'svm') satur_training_dir = os.path.join(SAVEDIR, 'saturation') pca_throughtrain_dir = os.path.join(SAVEDIR, 'pca_throughtrain') (trainer.reg(tnr.EpochsTracker()).reg(tnr.EpochsStopper(100)).reg( tnr.InfOrNANDetecter()).reg(tnr.DecayTracker()).reg( tnr.DecayStopper(8)).reg(tnr.LRMultiplicativeDecayer()).reg( tnr.DecayOnPlateau()).reg(tnr.AccuracyTracker(5, 1000, True)) #.reg(tnr.WeightNoiser( # wi.GaussianWeightInitializer(mean=0, vari=0.02, normalize_dim=None), # lambda ctxt: ctxt.model.layers[-1].weight.data)) #.reg(tnr.OnEpochCaller.create_every(satur.during_training(satur_training_dir, True, dig), skip=1000)) #.reg(tnr.OnEpochCaller.create_every(dtt.during_training_ff(dtt_training_dir, True, dig), skip=1000)) .reg( tnr.OnEpochCaller.create_every(pca_ff.during_training( pca_training_dir, True, dig), skip=1000)) #.reg(tnr.OnEpochCaller.create_every(pr.during_training_ff(pr_training_dir, True, dig), skip=1000)) #.reg(tnr.OnEpochCaller.create_every(svm.during_training_ff(svm_training_dir, True, dig), skip=1000)) #.reg(pca3d_throughtrain.PCAThroughTrain(pca_throughtrain_dir, layer_names, True)) .reg(tnr.OnFinishCaller(lambda *args, **kwargs: dig.join())).reg( tnr.ZipDirOnFinish(dtt_training_dir)).reg( tnr.ZipDirOnFinish(pca_training_dir)).reg( tnr.ZipDirOnFinish(pr_training_dir)).reg( tnr.ZipDirOnFinish(svm_training_dir)).reg( tnr.ZipDirOnFinish(satur_training_dir))) trainer.train(network) #pca_3d.plot_ff(pca_ff.find_trajectory(network, pwl, 3), os.path.join(SAVEDIR, 'pca_3d_end'), True, # digestor=dig, frame_time=FRAME_TIME, layer_names=layer_names) dig.archive_raw_inputs(os.path.join(SAVEDIR, 'raw_digestor.zip'))
def train_with_noise(vari, rep, pr_repeats, ignoreme): # pylint: disable=unused-argument """Entry point""" train_pwl = GaussianSpheresPWLP.create(epoch_size=30000, input_dim=INPUT_DIM, output_dim=2, cube_half_side_len=2, num_clusters=10, std_dev=0.2, mean=0, min_sep=0.4, force_split=True) test_pwl = train_pwl nets = cu.FluentShape(INPUT_DIM).verbose() mywi = wi.WICombine([ wi.RectangularEyeWeightInitializer(1), wi.GaussianWeightInitializer(mean=0, vari=0.3) ]) network = FeedforwardComplex(INPUT_DIM, train_pwl.output_dim, [ nets.linear_(DIM, weights_init=mywi), nets.nonlin('leakyrelu'), nets.linear_(DIM, weights_init=mywi), nets.nonlin('leakyrelu'), nets.linear_(DIM, weights_init=mywi), nets.nonlin('leakyrelu'), nets.linear_(DIM, weights_init=mywi), nets.nonlin('leakyrelu'), nets.linear_(train_pwl.output_dim), nets.nonlin('leakyrelu'), ]) _lr = 0.01 trainer = tnr.GenericTrainer( train_pwl=train_pwl, test_pwl=test_pwl, teacher=FFTeacher(), batch_size=20, learning_rate=_lr, optimizer=torch.optim.SGD( [p for p in network.parameters() if p.requires_grad], lr=_lr), criterion=mycrits.hubererr #torch.nn.CrossEntropyLoss()# ) #pca3d_throughtrain.FRAMES_PER_TRAIN = 4 #pca3d_throughtrain.SKIP_TRAINS = 0 #pca3d_throughtrain.NUM_FRAME_WORKERS = 6 dig = npmp.NPDigestor(f'TRMCN_{rep}_{vari}', 4) savedir = os.path.join(SAVEDIR, f'variance_{vari}', f'repeat_{rep}') shared.filetools.deldir(savedir) dtt_training_dir = os.path.join(savedir, 'dtt') pca_training_dir = os.path.join(savedir, 'pca') pca3d_training_dir = os.path.join(savedir, 'pca3d') pr_training_dir = os.path.join(savedir, 'pr') svm_training_dir = os.path.join(savedir, 'svm') satur_training_dir = os.path.join(savedir, 'saturation') trained_net_dir = os.path.join(savedir, 'trained_model') pca_throughtrain_dir = os.path.join(savedir, 'pca_throughtrain') acts_training_dir = os.path.join(savedir, 'acts') logpath = os.path.join(savedir, 'log.txt') (trainer.reg(tnr.EpochsTracker()).reg(tnr.EpochsStopper(300)).reg( tnr.EpochProgress(5, hint_end_epoch=10)).reg(tnr.DecayTracker()).reg( tnr.DecayStopper(10)).reg(tnr.InfOrNANDetecter()).reg( tnr.InfOrNANStopper()).reg( tnr.LRMultiplicativeDecayer(factor=0.9)) #.reg(tnr.DecayOnPlateau(verbose=False)) .reg(tnr.DecayEvery(1, verbose=False)).reg( tnr.AccuracyTracker(1, 1000, True, savepath=os.path.join(savedir, 'accuracy.json')))) if ALL_LAYERS_NOISED: tonoise = list(range(1, len(network.layers))) else: tonoise = [len(network.layers) - 2] noisestyle = 'add' def layer_fetcher(lyr): return lambda ctx: ctx.model.layers[lyr].action.weight.data.detach() noisedecayer = lambda noise: wi.GaussianWeightInitializer( 0, noise.vari * 0.9) for lyr in tonoise: if network.layers[lyr].is_module: trainer.reg( tnr.WeightNoiser( wi.GaussianWeightInitializer(mean=0, vari=vari), layer_fetcher(lyr), noisestyle, noisedecayer)) if rep < pr_repeats: trainer.reg( tnr.OnEpochCaller.create_every(pr.during_training_ff( pr_training_dir, True, dig), skip=100)) (trainer #.reg(tnr.OnEpochCaller.create_every(dtt.during_training_ff(dtt_training_dir, True, dig), skip=100)) #.reg(tnr.OnEpochCaller.create_every(pca_3d.during_training(pca3d_training_dir, True, dig, plot_kwargs={'layer_names': layer_names}), start=500, skip=100)) #.reg(tnr.OnEpochCaller.create_every(pca_ff.during_training(pca_training_dir, True, dig), skip=100)) #.reg(tnr.OnEpochCaller.create_every(pr.during_training_ff(pr_training_dir, True, dig), skip=100)) #.reg(tnr.OnEpochCaller.create_every(svm.during_training_ff(svm_training_dir, True, dig), skip=100)) #.reg(tnr.OnEpochCaller.create_every(satur.during_training(satur_training_dir, True, dig), skip=100)) .reg(tnr.OnEpochCaller.create_every(measacts.during_training(acts_training_dir, dig, meta={'time': time.time(), 'noised_layers': tonoise, 'variance': vari, 'repeat': rep}), skip=100)) .reg(tnr.OnEpochCaller.create_every(tnr.save_model(trained_net_dir), skip=100)) #.reg(pca3d_throughtrain.PCAThroughTrain(pca_throughtrain_dir, layer_names, True)) .reg(tnr.OnFinishCaller(lambda *args, **kwargs: dig.join())) .reg(tnr.CopyLogOnFinish(logpath)) .reg(tnr.ZipDirOnFinish(dtt_training_dir)) .reg(tnr.ZipDirOnFinish(pca_training_dir)) .reg(tnr.ZipDirOnFinish(pca3d_training_dir)) .reg(tnr.ZipDirOnFinish(pr_training_dir)) .reg(tnr.ZipDirOnFinish(svm_training_dir)) .reg(tnr.ZipDirOnFinish(satur_training_dir)) .reg(tnr.ZipDirOnFinish(trained_net_dir)) ) result = trainer.train(network) dig.archive_raw_inputs(os.path.join(savedir, 'digestor_raw.zip')) if result['inf_or_nan']: print('[TMCN] Inf or NAN detected - repeating run') shared.filetools.deldir(savedir)
def main(): """Entry point""" nets = cu.FluentShape(28 * 28).verbose() network = FeedforwardComplex(INPUT_DIM, OUTPUT_DIM, [ nets.linear_(HIDDEN_DIM), nets.tanh(), nets.linear_(OUTPUT_DIM), nets.tanh() ]) train_pwl = MNISTData.load_train().to_pwl().restrict_to(set( range(10))).rescale() test_pwl = MNISTData.load_test().to_pwl().restrict_to(set( range(10))).rescale() layer_names = ('Input', 'Hidden', 'Output') trainer = tnr.GenericTrainer( train_pwl=train_pwl, test_pwl=test_pwl, teacher=FFTeacher(), batch_size=45, learning_rate=0.001, optimizer=torch.optim.Adam( [p for p in network.parameters() if p.requires_grad], lr=0.001), criterion=mycrits.meansqerr #torch.nn.CrossEntropyLoss() ) dig = npmp.NPDigestor('train_one_complex', 35) dtt_training_dir = os.path.join(SAVEDIR, 'dtt') pca_training_dir = os.path.join(SAVEDIR, 'pca') pca3d_training_dir = os.path.join(SAVEDIR, 'pca3d') pr_training_dir = os.path.join(SAVEDIR, 'pr') svm_training_dir = os.path.join(SAVEDIR, 'svm') satur_training_dir = os.path.join(SAVEDIR, 'saturation') trained_net_dir = os.path.join(SAVEDIR, 'trained_model') pca_throughtrain_dir = os.path.join(SAVEDIR, 'pca_throughtrain') wds_training_dir = os.path.join(SAVEDIR, 'weightdeltas') logpath = os.path.join(SAVEDIR, 'log.txt') (trainer.reg(tnr.EpochsTracker()).reg(tnr.EpochsStopper(3)).reg( tnr.DecayTracker()).reg(tnr.DecayStopper(8)).reg( tnr.LRMultiplicativeDecayer()).reg(tnr.DecayOnPlateau()). reg(tnr.AccuracyTracker(5, 1000, True)).reg( tnr.WeightNoiser( wi.GaussianWeightInitializer(mean=0, vari=0.1), (lambda ctx: ctx.model.layers[0].action.weight.data.detach()), 'scale', (lambda noise: wi.GaussianWeightInitializer(0, noise.vari * 0.5) ))).reg( tnr.OnEpochCaller.create_every(dtt.during_training_ff( dtt_training_dir, True, dig), skip=100)) #.reg(tnr.OnEpochCaller.create_every(pca_3d.during_training(pca3d_training_dir, True, dig, plot_kwargs={'layer_names': layer_names}), start=1000, skip=1000)) .reg( tnr.OnEpochCaller.create_every( pca_ff.during_training(pca_training_dir, True, dig), skip=100)).reg( tnr.OnEpochCaller.create_every( pr.during_training_ff(pr_training_dir, True, dig), skip=100)).reg( tnr.OnEpochCaller.create_every( svm.during_training_ff(svm_training_dir, True, dig), skip=100)).reg( tnr.OnEpochCaller.create_every( satur.during_training( satur_training_dir, True, dig), skip=100)).reg( tnr.OnEpochCaller.create_every( tnr.save_model(trained_net_dir), skip=100)). reg( wds.Binned2Norm( (lambda ctx: ctx.model.layers[0].action.weight.data.detach()), dig, wds_training_dir, 'Induced Changes in $W^{(1)}$')) #.reg(pca3d_throughtrain.PCAThroughTrain(pca_throughtrain_dir, layer_names, True, layer_indices=plot_layers)) .reg(tnr.OnFinishCaller(lambda *args, **kwargs: dig.join())).reg( tnr.CopyLogOnFinish(logpath)).reg( tnr.ZipDirOnFinish(dtt_training_dir)).reg( tnr.ZipDirOnFinish(pca_training_dir)).reg( tnr.ZipDirOnFinish(pca3d_training_dir)).reg( tnr.ZipDirOnFinish(pr_training_dir)).reg( tnr.ZipDirOnFinish(svm_training_dir)).reg( tnr.ZipDirOnFinish(satur_training_dir)).reg( tnr.ZipDirOnFinish(trained_net_dir))) trainer.train(network) dig.archive_raw_inputs(os.path.join(SAVEDIR, 'digestor_raw.zip'))
def main(): """Entry point""" cu.DEFAULT_LINEAR_BIAS_INIT = wi.ZerosWeightInitializer() cu.DEFAULT_LINEAR_WEIGHT_INIT = wi.GaussianWeightInitializer( mean=0, vari=0.3, normalize_dim=0) nets = cu.FluentShape(32 * 32 * 3).verbose() network = FeedforwardComplex(INPUT_DIM, OUTPUT_DIM, [ nets.linear_(32 * 32 * 6), nets.nonlin('isrlu'), nets.linear_(500), nets.nonlin('tanh'), nets.linear_(250), nets.nonlin('tanh'), nets.linear_(250), nets.nonlin('tanh'), nets.linear_(100), nets.tanh(), nets.linear_(100), nets.tanh(), nets.linear_(100), nets.tanh(), nets.linear_(OUTPUT_DIM), nets.nonlin('isrlu'), ]) train_pwl = CIFARData.load_train().to_pwl().restrict_to(set( range(10))).rescale() test_pwl = CIFARData.load_test().to_pwl().restrict_to(set( range(10))).rescale() layer_names = ('input', 'FC -> 32*32*6 (ISRLU)', 'FC -> 500 (tanh)', 'FC -> 250 (tang)', 'FC -> 250 (tanh)', 'FC -> 100 (tanh)', 'FC -> 100 (tanh)', 'FC -> 100 (tanh)', f'FC -> {OUTPUT_DIM} (ISRLU)') plot_layers = tuple(i for i in range(2, len(layer_names) - 1)) trainer = tnr.GenericTrainer( train_pwl=train_pwl, test_pwl=test_pwl, teacher=FFTeacher(), batch_size=45, learning_rate=0.001, optimizer=torch.optim.Adam( [p for p in network.parameters() if p.requires_grad], lr=0.001), criterion=torch.nn.CrossEntropyLoss()) pca3d_throughtrain.FRAMES_PER_TRAIN = 1 pca3d_throughtrain.SKIP_TRAINS = 16 pca3d_throughtrain.NUM_FRAME_WORKERS = 1 dig = npmp.NPDigestor('train_one_complex', 5) dtt_training_dir = os.path.join(SAVEDIR, 'dtt') pca_training_dir = os.path.join(SAVEDIR, 'pca') pca3d_training_dir = os.path.join(SAVEDIR, 'pca3d') pr_training_dir = os.path.join(SAVEDIR, 'pr') svm_training_dir = os.path.join(SAVEDIR, 'svm') satur_training_dir = os.path.join(SAVEDIR, 'saturation') trained_net_dir = os.path.join(SAVEDIR, 'trained_model') pca_throughtrain_dir = os.path.join(SAVEDIR, 'pca_throughtrain') logpath = os.path.join(SAVEDIR, 'log.txt') (trainer.reg(tnr.EpochsTracker()).reg(tnr.EpochsStopper(STOP_EPOCH)).reg( tnr.DecayTracker()).reg(tnr.DecayStopper(8)).reg( tnr.EpochProgress(print_every=120, hint_end_epoch=STOP_EPOCH)).reg( tnr.LRMultiplicativeDecayer()).reg( tnr.DecayOnPlateau(patience=3)).reg( tnr.AccuracyTracker(1, 1000, True)).reg( tnr.OnEpochCaller.create_every( dtt.during_training_ff(dtt_training_dir, True, dig), skip=5)).reg( tnr.OnEpochCaller.create_every( pca_3d.during_training( pca3d_training_dir, True, dig, plot_kwargs={ 'layer_names': layer_names }), start=10, skip=100)). reg( tnr.OnEpochCaller.create_every( pca_ff.during_training(pca_training_dir, True, dig), skip=5)).reg( tnr.OnEpochCaller.create_every( pr.during_training_ff(pr_training_dir, True, dig, labels=False), skip=5)).reg( tnr.OnEpochCaller.create_every( svm.during_training_ff(svm_training_dir, True, dig), skip=5)).reg( tnr.OnEpochCaller.create_every( satur.during_training( satur_training_dir, True, dig), skip=5)).reg( tnr.OnEpochCaller.create_every( tnr.save_model(trained_net_dir), skip=5)) #.reg(pca3d_throughtrain.PCAThroughTrain(pca_throughtrain_dir, layer_names, True, layer_indices=plot_layers)) .reg(tnr.OnFinishCaller(lambda *args, **kwargs: dig.join())).reg( tnr.ZipDirOnFinish(dtt_training_dir)).reg( tnr.ZipDirOnFinish(pca_training_dir)).reg( tnr.ZipDirOnFinish(pca3d_training_dir)).reg( tnr.ZipDirOnFinish(pr_training_dir)).reg( tnr.ZipDirOnFinish(svm_training_dir)).reg( tnr.ZipDirOnFinish(satur_training_dir)).reg( tnr.ZipDirOnFinish(trained_net_dir)).reg( tnr.CopyLogOnFinish(logpath))) trainer.train(network) dig.archive_raw_inputs(os.path.join(SAVEDIR, 'digestor_raw.zip'))
class DeepQBot(qbot.QBot): """The Q-bot implementation Attributes: entity_iden (int): the entity we are controlling model (FeedforwardComplex): the model that does the evaluating teacher (FFTeacher): the teacher for the model evaluation (bool): True to not store experiences, False to store experiences replay (WritableReplayBuffer, optional): the buffer for replays encoder (Encoder): the encoder """ def __init__(self, entity_iden: int, replay_path=REPLAY_FOLDER, evaluation=False): self.entity_iden = entity_iden if not os.path.exists(EVAL_MODELFILE): _init_model() self.model = Deep1ModelEval.load(EVAL_MODELFILE) self.teacher = FFTeacher() self.evaluation = evaluation self.encoder = init_encoder(entity_iden) if not evaluation: self.replay = replay_buffer.FileWritableReplayBuffer(replay_path, exist_ok=True) else: self.replay = None def __call__(self, entity_iden): self.entity_iden = entity_iden self.encoder = init_encoder(entity_iden) @property def cutoff(self): return CUTOFF @property def alpha(self): return ALPHA def evaluate(self, game_state: GameState, move: Move): result = torch.tensor([0.0], dtype=torch.float) self.teacher.classify(self.model, self.encoder.encode(game_state, move), result) return float(result.item()) def learn(self, game_state: GameState, move: Move, new_state: GameState, reward_raw: float, reward_pred: float) -> None: if self.evaluation: print( f'predicted reward: {self.evaluate(game_state, move):.2f} vs actual reward ' + f'{reward_raw:.2f} + {reward_pred:.2f} = {reward_raw + reward_pred:.2f}' ) return player_id = 1 if self.entity_iden == game_state.player_1_iden else 2 self.replay.add( replay_buffer.Experience( game_state, move, self.cutoff, new_state, reward_raw, player_id, None, self.encoder.encode(game_state, move).numpy(), self.encoder.encode(new_state, move).numpy())) def save(self) -> None: pass
def main(): """Entry point""" nets = cu.FluentShape(28*28) network = FeedforwardComplex( INPUT_DIM, OUTPUT_DIM, [ nets.unflatten_conv_(1, 28, 28), nets.conv_(5, 5, 5), nets.relu(), nets.maxpool_(2), nets.flatten_(invokes_callback=True), nets.linear_(nets.dims[0]), nets.tanh(), nets.linear_(OUTPUT_DIM), nets.tanh() ] ) #breakpoint() train_pwl = MNISTData.load_train().to_pwl().restrict_to(set(range(10))).rescale() test_pwl = MNISTData.load_test().to_pwl().restrict_to(set(range(10))).rescale() layer_names = ('input', 'conv2d-relu', 'maxpool', 'tanh', 'output') plot_layers = (3,) trainer = tnr.GenericTrainer( train_pwl=train_pwl, test_pwl=test_pwl, teacher=FFTeacher(), batch_size=45, learning_rate=0.001, optimizer=torch.optim.Adam([p for p in network.parameters() if p.requires_grad], lr=0.001), criterion=torch.nn.CrossEntropyLoss() ) pca3d_throughtrain.FRAMES_PER_TRAIN = 1 pca3d_throughtrain.SKIP_TRAINS = 0 pca3d_throughtrain.NUM_FRAME_WORKERS = 6 dig = npmp.NPDigestor('train_one_complex', 35) dtt_training_dir = os.path.join(SAVEDIR, 'dtt') pca_training_dir = os.path.join(SAVEDIR, 'pca') pca3d_training_dir = os.path.join(SAVEDIR, 'pca3d') pr_training_dir = os.path.join(SAVEDIR, 'pr') svm_training_dir = os.path.join(SAVEDIR, 'svm') satur_training_dir = os.path.join(SAVEDIR, 'saturation') trained_net_dir = os.path.join(SAVEDIR, 'trained_model') pca_throughtrain_dir = os.path.join(SAVEDIR, 'pca_throughtrain') (trainer .reg(tnr.EpochsTracker()) .reg(tnr.EpochsStopper(5)) .reg(tnr.DecayTracker()) .reg(tnr.DecayStopper(8)) .reg(tnr.LRMultiplicativeDecayer()) .reg(tnr.DecayOnPlateau()) .reg(tnr.AccuracyTracker(5, 1000, True)) .reg(tnr.OnEpochCaller.create_every(dtt.during_training_ff(dtt_training_dir, True, dig), skip=100)) #.reg(tnr.OnEpochCaller.create_every(pca_3d.during_training(pca3d_training_dir, True, dig, plot_kwargs={'layer_names': layer_names}), skip=100)) #.reg(tnr.OnEpochCaller.create_every(pca_ff.during_training(pca_training_dir, True, dig), skip=100)) .reg(tnr.OnEpochCaller.create_every(pr.during_training_ff(pr_training_dir, True, dig), skip=100)) .reg(tnr.OnEpochCaller.create_every(svm.during_training_ff(svm_training_dir, True, dig), skip=100)) .reg(tnr.OnEpochCaller.create_every(satur.during_training(satur_training_dir, True, dig), skip=100)) .reg(tnr.OnEpochCaller.create_every(tnr.save_model(trained_net_dir), skip=100)) .reg(pca3d_throughtrain.PCAThroughTrain(pca_throughtrain_dir, layer_names, True, layer_indices=plot_layers)) .reg(tnr.OnFinishCaller(lambda *args, **kwargs: dig.join())) .reg(tnr.ZipDirOnFinish(dtt_training_dir)) .reg(tnr.ZipDirOnFinish(pca_training_dir)) .reg(tnr.ZipDirOnFinish(pca3d_training_dir)) .reg(tnr.ZipDirOnFinish(pr_training_dir)) .reg(tnr.ZipDirOnFinish(svm_training_dir)) .reg(tnr.ZipDirOnFinish(satur_training_dir)) .reg(tnr.ZipDirOnFinish(trained_net_dir)) ) trainer.train(network) dig.archive_raw_inputs(os.path.join(SAVEDIR, 'digestor_raw.zip'))