def __setupStage(self): """ Create all objects within the database with defaults """ self.executionDetails = TestExecutionDetails.create( testSuiteName = self.testSuiteName, executionName = self.executionName, #TODO: make me! scmidentifier = 'N/A', shortFilterDesc = self.shortFilterDesc, testPackDescriptor = self.packFilterDesc, startTime = time.time(), endTime = -1, duration = -1, envservExecutionModeName = 'automated', # FIXME: read from config hostNetAddress = Network.getIPAddressByInterface(), hostMacAddress = Network.getMacAddress(), tasVersion = os.environ['TAS_VERSION']) self.executionDetails.save() if not hasattr(self, 'source'): raise Exception("SQLLiteLogger: Source object has not been set") ResultBaseModel.Meta.database.set_autocommit(False) for group in self.source.groups: for test in group.tests: testDetails = TestDetails.get_or_create( testId = str(test.testId), module = str(test.testFile), testName = test.combineClassMethod(), invalid = test.invalid, manualInspection = test.manualInspection, docstrings = "\n".join(test.docstrings), timeout = test.testTimeout) testState = TestStates.filter(sequenceId = test.state.index).get() testResults = TestResults.create( testDetailsRef = testDetails, executed = False, invalid = test.invalid, result = testState, error = test.error, startTime = -1, duration = -1, manualInspection = test.manualInspection, testExecutionDetailsRef = self.executionDetails, peerRef = None, iterationId = None) testResults.save() testDetails.save() ResultBaseModel.Meta.database.commit() ResultBaseModel.Meta.database.set_autocommit(True)
def gatherTests(self): """ Discover all tests from all sources """ tests, content = [], {} execution = ConfigurationManager().getConfiguration('execution').configuration if execution.scripts.enabled == 'true': if not os.path.exists(execution.scripts.PCDATA): raise Exception('Invalid configuration. Executions scripts do not exist') content['tools'] = execution.scripts.PCDATA if len(self.__testCreators) == 0: raise Exception('No enabled test creators found') for creator in self.__testCreators: newTests = creator.createTests() if len(newTests) == 0: raise Exception('%s provided no tests!' % creator.__class__.__name__) tests += newTests content[creator.execScriptName] = creator.execScriptLocation content[creator.srcName] = creator.srcLocation testIds = [] for test in tests: if test.testId not in testIds: continue raise Exception('Duplicate test ID %s' % test.testId) if len(tests) == 0: raise Exception('No tests found!') del testIds defGroup = Group(self.DEFAULT_GROUP_NAME, self.DEFAULT_GROUP_DESC, tests) source = Source(self.DEFAULT_LOCATION, defGroup) descBuffer = StringIO() for testFilter in self.__testFilters: source = testFilter.filterTests(source) descBuffer.write(testFilter.getAppliedFilterDescription()) desc = descBuffer.getvalue() descBuffer.close() testSuiteName = "FIXME" # TODO: where does this come from? dateTime = datetime.fromtimestamp(time.time()).strftime('%Y-%m-%dT%H:%M:%S') macAddress = Network.getMacAddress() executionName = "%s_%s_%s" % (testSuiteName, dateTime, macAddress) self.__packDetails = (testSuiteName, str(desc), self.__getShortFilterDesc(), executionName, dateTime) self.__makeTar(**content) return source, desc
def __init__(self, capacity=500000, warmup_steps=50000, n_frames=4, n_steps=3, n_atoms=21, v_min=-1, v_max=0, alpha=.6, beta=.4, gamma=.99, hidden_size=512, device='cuda', batch_size=48, lr=0.0000625 * 2, lr_decay=0.985, beta_converged=4000000, update_target_net_every=16000, train_every=4, frame_skip=4): self.memory_buffer = MemoryBuffer( capacity, n_frames, n_steps, SuperHexagonInterface.frame_size, SuperHexagonInterface.frame_size_cropped, alpha, beta, gamma, device=device) self.net = Network(n_frames, SuperHexagonInterface.n_actions, n_atoms, hidden_size).to(device) self.target_net = Network(n_frames, SuperHexagonInterface.n_actions, n_atoms, hidden_size).to(device) self.target_net.load_state_dict(self.net.state_dict()) self.optimizer = torch.optim.Adam(self.net.parameters(), lr=lr, eps=1.5e-4) self.lr_scheduler = torch.optim.lr_scheduler.LambdaLR( self.optimizer, ExpLrDecay(lr_decay, min_factor=.1)) self.batch_size = batch_size self.beta_converged = beta_converged self.update_target_net_every = update_target_net_every self.train_every = train_every self.frame_skip = frame_skip self.warmup_steps = warmup_steps self.n_steps = n_steps self.beta = beta self.gamma = gamma self.n_atoms = n_atoms self.v_min = v_min self.v_max = v_max self.delta_z = (v_max - v_min) / (n_atoms - 1) self.support = torch.linspace(v_min, v_max, n_atoms, dtype=torch.float, device=device) self.iteration = 0 self.list_steps_alive = [] self.losses = [] self.kls = [] self.times = [] self._offset = torch.arange(0, batch_size * n_atoms, n_atoms, device=device).view(-1, 1) self._m = torch.empty((batch_size, n_atoms), device=device) self._longest_run = 0
class Trainer: def __init__(self, capacity=500000, warmup_steps=50000, n_frames=4, n_steps=3, n_atoms=21, v_min=-1, v_max=0, alpha=.6, beta=.4, gamma=.99, hidden_size=512, device='cuda', batch_size=48, lr=0.0000625 * 2, lr_decay=0.985, beta_converged=4000000, update_target_net_every=16000, train_every=4, frame_skip=4): self.memory_buffer = MemoryBuffer( capacity, n_frames, n_steps, SuperHexagonInterface.frame_size, SuperHexagonInterface.frame_size_cropped, alpha, beta, gamma, device=device) self.net = Network(n_frames, SuperHexagonInterface.n_actions, n_atoms, hidden_size).to(device) self.target_net = Network(n_frames, SuperHexagonInterface.n_actions, n_atoms, hidden_size).to(device) self.target_net.load_state_dict(self.net.state_dict()) self.optimizer = torch.optim.Adam(self.net.parameters(), lr=lr, eps=1.5e-4) self.lr_scheduler = torch.optim.lr_scheduler.LambdaLR( self.optimizer, ExpLrDecay(lr_decay, min_factor=.1)) self.batch_size = batch_size self.beta_converged = beta_converged self.update_target_net_every = update_target_net_every self.train_every = train_every self.frame_skip = frame_skip self.warmup_steps = warmup_steps self.n_steps = n_steps self.beta = beta self.gamma = gamma self.n_atoms = n_atoms self.v_min = v_min self.v_max = v_max self.delta_z = (v_max - v_min) / (n_atoms - 1) self.support = torch.linspace(v_min, v_max, n_atoms, dtype=torch.float, device=device) self.iteration = 0 self.list_steps_alive = [] self.losses = [] self.kls = [] self.times = [] self._offset = torch.arange(0, batch_size * n_atoms, n_atoms, device=device).view(-1, 1) self._m = torch.empty((batch_size, n_atoms), device=device) self._longest_run = 0 def set_lr(self, lr): for param_group in self.optimizer.param_groups: param_group['lr'] = lr def reinit_priority_queue(self): self.memory_buffer.update_priorities( np.arange(self.memory_buffer.size, dtype=np.int), np.ones(self.memory_buffer.size) * self.memory_buffer.priority_queue.max_value, False) self.memory_buffer.priority_queue.recompute_tree() def warmup(self, game, log_every): t = True for i in range(1, self.warmup_steps + 1): if i % log_every == 0: print('Warmup', i) if t: f, fc = game.reset() self.memory_buffer.insert_first(f, fc) a = np.random.randint(0, 3) (f, fc), r, t = game.step(a) self.memory_buffer.insert(a, r, t, f, fc) self.memory_buffer.update_priorities( np.arange(self.warmup_steps - self.n_steps, dtype=np.int), np.ones(self.warmup_steps - self.n_steps) * np.log(self.n_atoms - 1), False) return t def train( self, save_every=50000, save_name='trainer', log_every=1000, ): save_when_terminal = False game = SuperHexagonInterface(self.frame_skip) f, fc = np.zeros(game.frame_size, dtype=np.bool), np.zeros(game.frame_size_cropped, dtype=np.bool) sf, sfc = np.zeros((1, 4, *game.frame_size), dtype=np.bool), np.zeros( (1, 4, *game.frame_size_cropped), dtype=np.bool) t = True if self.iteration == 0: if os.path.exists('warmup_buffer_level3.npz'): self.memory_buffer.load_warmup('warmup_buffer_level3.npz') else: t = self.warmup(game, log_every) self.memory_buffer.save_warmup('warmup_buffer_level3.npz') last_time = time() while True: self.iteration += 1 if self.iteration % log_every == 0 and len( self.list_steps_alive) > 0: print(f'{self.iteration} | ' f'{np.mean(self.list_steps_alive[-100:]) / 60:.2f}s | ' f'{self._longest_run / 60:.2f}s | ' f'{time() - last_time:.2f}s | ' f'{np.mean(self.losses[-log_every:])} | ' f'{np.mean(self.kls[-log_every:])} | ' f'{self.lr_scheduler.get_last_lr()[0]}') # last_time = time() if self.iteration % save_every == 0: save_when_terminal = True if self.iteration % self.update_target_net_every == 0: self.memory_buffer.beta = min( 1., self.beta + (1 - self.beta) * self.iteration / self.beta_converged) self.lr_scheduler.step() self.target_net.load_state_dict(self.net.state_dict()) if t: # game.recorder.start() f, fc = game.reset() self.memory_buffer.insert_first(f, fc) sf[:] = 0 sfc[:] = 0 sf[0, 1:] = sf[0, :-1] sfc[0, 1:] = sfc[0, :-1] sf[0, 0] = f sfc[0, 0] = fc if self.iteration % self.train_every == 0: a, loss, kl = self.train_and_get_action(True) self.losses.append(loss) self.kls.append(kl) else: with torch.no_grad(): self.net.reset_noise() a = (self.net( torch.from_numpy(sf).cuda().float(), torch.from_numpy(sfc).cuda().float()) * self.support).sum(dim=2).argmax(dim=1).item() (f, fc), r, t = game.step(a) self.memory_buffer.insert(a, r, t, f, fc) if t: if game.steps_alive > self._longest_run: self._longest_run = game.steps_alive # game.recorder.stop() # if game.steps_alive >= 60 * 60 and game.steps_alive > self._longest_run * .7: # game.recorder.save(f'superhexagon_{int(time())}', 60) # game.recorder.start() self.list_steps_alive.append(game.steps_alive) self.times.append(time() - last_time) if save_when_terminal: print('saving...') self.memory_buffer.priority_queue.recompute_tree() self.save(save_name) save_when_terminal = False def train_and_get_action(self, renew_action): f, fc, a, r, t, f1, fc1, w, idx = self.memory_buffer.make_batch( self.batch_size - int(renew_action), include_last_insertion=renew_action) with torch.no_grad(): self.target_net.reset_noise() self.net.reset_noise() an = (self.net(f1, fc1) * self.support).sum(dim=2).argmax(dim=1) qdn = self.target_net(f1, fc1) ''' with torch.no_grad(): torch.cuda.synchronize() with torch.cuda.stream(self.cuda_stream_1): self.target_net.reset_noise() qdn = self.target_net(f1, fc1) with torch.cuda.stream(self.cuda_stream_2): self.net.reset_noise() an = (self.net(f1, fc1) * self.support).sum(dim=2).argmax(dim=1) torch.cuda.synchronize() ''' #if self.iteration <= self.update_target_net_every: # Tz = r.unsqueeze(1).expand((self.batch_size, self.n_atoms)).clamp(self.v_min, self.v_max) #else: Tz = (r.unsqueeze(1) + t.logical_not().unsqueeze(1) * self.gamma**self.n_steps * self.support).clamp_( self.v_min, self.v_max) b = (Tz - self.v_min) / self.delta_z l = b.floor().long() u = b.ceil().long() l[(u > 0) & (l == u)] -= 1 u[(l == u)] += 1 vdn = qdn.gather( 1, an.view(-1, 1, 1).expand(self.batch_size, -1, self.n_atoms)).view(self.batch_size, self.n_atoms) self._m.zero_() self._m.view(-1).index_add_(0, (l + self._offset).view(-1), (vdn * (u - b)).view(-1)) self._m.view(-1).index_add_(0, (u + self._offset).view(-1), (vdn * (b - l)).view(-1)) qld = self.net(f, fc, log=True) vld = qld.gather( 1, a.view(-1, 1, 1).expand(self.batch_size, -1, self.n_atoms)).view(self.batch_size, self.n_atoms) ce = -torch.sum(self._m * vld, dim=1) loss = (w * ce).mean() self.optimizer.zero_grad() loss.backward() self.optimizer.step() kl = F.kl_div( vld.detach(), self._m, reduction='none').sum(dim=1).clamp(min=.001).cpu().numpy() self.memory_buffer.update_priorities(idx, kl, renew_action) return (qld[0].detach().exp() * self.support).sum( dim=1).argmax().item(), loss.detach().item(), kl.mean().item() def save(self, file_name='trainer'): file_name_backup = file_name + '_backup' if os.path.exists(file_name): os.rename(file_name, file_name_backup) with open(file_name, 'wb') as f: pickle.dump(self, f) if os.path.exists(file_name_backup): os.remove(file_name_backup) @staticmethod def load(file_name='trainer'): with open(file_name, 'rb') as f: ret = pickle.load(f) assert ret.memory_buffer.last_was_terminal return ret