def make_embed(self, page): embed = discord.Embed(title="Rubbergod", description="Nejlepší a nejúžasnější bot ever.", color=0xeee657) prefix = config.default_prefix embed.add_field(name="Autor", value="Toaster#1111") # Shows the number of servers the bot is member of. embed.add_field(name="Počet serverů s touto instancí bota", value=f"{len(self.bot.guilds)}") embed.add_field(name="\u200b", value="Příkazy:", inline=False) info = messages.info[page - 1] for command in info: embed.add_field(name=prefix + command[0], value=command[1], inline=False) git_hash = utils.git_hash() footer_text = f"Commit {git_hash}" if len(messages.info) > 1: footer_text = f"Page {page} | {footer_text}" embed.set_footer(text=footer_text, icon_url=self.bot.user.avatar_url) return embed
def __init__(self, bot: Bot): super().__init__(bot) self.activity = discord.Game(start=datetime.datetime.utcnow(), name=config.Config.default_prefix + 'god' ' | Running hash ' + utils.git_hash()[:7])
voids = "_voids" if not args.include_voidless else '' trn_labels_fpath = osp.join(LABEL_DIR, TRN_VIDEO_ID + voids + '.txt') print("Training set:", trn_labels_fpath) img_dir_test = osp.join(IMAGE_DIR, VAL_VIDEO_ID) val_labels_fpath = osp.join(LABEL_DIR, VAL_VIDEO_ID + voids + '.txt') print("Validation set:", val_labels_fpath) shuffle = not DEBUG os.makedirs(CKPT_DIR, exist_ok=True) gpu_name = get_gpu_names()[args.gpu] trn_name = osp.basename(trn_labels_fpath) val_name = osp.basename(val_labels_fpath) arch_name = ARCH.__name__ sqlite_path = "database.sqlite3" trial_id = get_trial_id(sqlite_path) if not args.test_code else -1 git = git_hash() print("Trial ID:", trial_id) with torch.cuda.device(args.gpu): # Model print('==> Building model..') net = ARCH(**ARCH_KWARGS) net.cuda() cudnn.benchmark = True # WARNING: Don't use if using images w/ diff shapes # TODO: Check for this condition automatically best_loss = float('inf') # best test loss start_epoch = 0 # start from epoch 0 or last epoch criterion = SSDLoss() lr = 1e-3 momentum = 0.9 weight_decay = 1e-4 optimizer = optim.SGD(net.parameters(),
def train( rank, world_size, save_dir, experiment_name, model, loss_func, opt, get_train, get_crossval, batch_queue, experimentsettings, trainsettings={}, git_hash=None, timestring=None, ): """ The big train function with all the settings """ dist.init_process_group('gloo', init_method='env://', world_size=world_size, rank=rank) torch.set_num_threads(1) if not utils.is_git_clean(): raise RuntimeError('Ensure that all changes have been committed!') git_hash = utils.git_hash() if git_hash is None else git_hash short_git_hash = git_hash[:7] # Start time string now = datetime.datetime.now() timestring = now.strftime( '%Y%m%d%a-%H%M%S') if timestring is None else timestring experiment_name = '{}-{}'.format(experiment_name, timestring) batchsize = experimentsettings.batchsize numepochs = trainsettings.numepochs testingbatchsize = trainsettings.testingbatchsize loss_every_i_batches = trainsettings.loss_every_i_batches stats_every_i_batches = trainsettings.stats_every_i_batches save_every_i_batches = trainsettings.save_every_i_batches weights_every_i_batches = trainsettings.weights_every_i_batches start_from_batch = trainsettings.start_from_batch # Log experiment information if start_from_batch == 0: log_experiment_info(save_dir, experiment_name, loss_func, git_hash, now, experimentsettings, trainsettings) if rank == 0: writer = torchboard.SummaryWriter(log_dir='runs/trainv1-{}-{}'.format( short_git_hash, experiment_name)) try: epoch_iterator = range( numepochs) if numepochs is not -1 else itertools.count() dataset_iterator = ((epoch, point) for epoch in epoch_iterator for point in get_train()) batch_iterator = utils.group_into(dataset_iterator, batchsize) stats = datautils.Stats() global_counter = batchsize * start_from_batch epoch = None for batchnum, batch__ in itertools.islice(enumerate(batch_iterator), start_from_batch, None): batch__ = list(batch__) cur_epoch = batch__[0][0] if rank == 0: print('(e: {} i: {}) Training batch #{}'.format( epoch, global_counter, batchnum)) if rank == 0 and batchnum % save_every_i_batches == 0: save_model(model, opt, save_dir, short_git_hash, experiment_name, epoch, global_counter) if epoch is None: epoch = cur_epoch if cur_epoch > epoch: if rank == 0: print('(e: {} i: {}) Testing cross-validation set'.format( epoch, global_counter)) epoch = cur_epoch errloss, regloss, acc, precision, recall, fscore = log_crossval( rank, world_size, model, loss_func, get_crossval, batch_queue, testingbatchsize, global_counter) if rank == 0: writer.add_scalar('Err-Loss/crossval', errloss, global_counter) writer.add_scalar('Reg-Loss/crossval', regloss, global_counter) writer.add_scalar('Accuracy/crossval', acc, global_counter) writer.add_scalar('Precision/crossval', precision, global_counter) writer.add_scalar('Recall/crossval', recall, global_counter) writer.add_scalar('F-Score/crossval', fscore, global_counter) if rank == 0 and batchnum % stats_every_i_batches == 0: log_stats(writer, stats, global_counter) stats = datautils.Stats() if rank == 0 and batchnum % weights_every_i_batches == 0: log_weights(writer, model, global_counter) # batch__ = list(batch__) # batch_ = list(batch__[rank::world_size]) # cur_epoch = [point[0] for point in batch__][0] # batch = [point[1] for point in batch_] dist.barrier() # Why do batching when most of the time is spent computing gradients? # The runtime of the forward direction is roughly linear in the number of created tensors, # thanks to caching # So by making each process spend roughly more equal amounts of time in the forward direction, # we indirectly make the # of created tensors more equal, # which indirectly makes the time spent computing gradients more equal. # The speed up isn't perfect, but it's still a speedup (3x speedup on 8 cores for relatively small batch sizes like 64) # Speed up will be better if the forward direction takes more time than the backward direction, # so speed up should be better on larger batch sizes. # Make sure that every process gets at least one point firstpoint = batch__[rank][1] if rank == 0: # Put the remaining points in the queue for point in batch__[world_size:]: batch_queue.put(point[1]) # If the queue is meant to be populated, # make sure everyone sees something in the queue before starting if len(batch__) > world_size: while batch_queue.empty() or batch_queue.qsize() == 0: continue dist.barrier() # batchx = [point[0] for point in batch] # batchy = [point[1] for point in batch] # preds = parallelmodel(batchx) batchx = [] batchy = [] preds = [] firstpred = model([firstpoint[0]]) batchx.append(firstpoint[0]) batchy.append(firstpoint[1]) preds.append(firstpred) while True: try: point = batch_queue.get(block=False) pred = model([point[0]]) batchx.append(point[0]) batchy.append(point[1]) preds.append(pred) except queue.Empty: if batch_queue.qsize() == 0 and batch_queue.empty(): break preds = torch.cat(preds) for pred, y in zip(preds, batchy): with torch.no_grad(): stats = datautils.update_stats(stats, bool(pred > 0), y) target = torch.tensor([1. if y else 0. for y in batchy]) errloss, regloss = loss_func(preds, target) errloss.backward() for param in model.parameters(): param.grad.data *= len(preds) dist.all_reduce(param.grad.data) param.grad.data /= len(batch__) regloss.backward() if rank == 0 and batchnum % loss_every_i_batches == 0: print('(e: {} i: {}) Loss: {:.5f}\t{:.5f}'.format( epoch, global_counter, errloss.item(), regloss.item())) writer.add_scalar('Err-Loss/train', errloss.item(), global_counter) writer.add_scalar('Reg-Loss/train', regloss.item(), global_counter) dist.barrier() if rank == 0: opt.step() opt.zero_grad() global_counter += len(batch__) except KeyboardInterrupt: pass
def main(fn: Path, testrun=False): global abort start = time.perf_counter() if not fn.with_suffix(".bin").exists(): if not testrun: with open(fn.with_suffix(".yaml")) as f: parameters = Parameters(**yaml.safe_load(f)) else: parameters = Parameters( massloss_method="rbf", initcon_file="initcon/conditions_many.input") # set up a fresh simulation sim = Simulation() sim.units = ('yr', 'AU', 'kg') # sim.boundary = "open" # boxsize = 100 # sim.configure_box(boxsize) sim.integrator = "mercurius" sim.dt = 1e-2 sim.ri_ias15.min_dt = 0.0001 / 365 if not parameters.no_merging: sim.collision = "direct" sim.ri_mercurius.hillfac = 3. sim.testparticle_type = 1 tmax = 200 * mega num_savesteps = 20000 if testrun: tmax /= 200000 num_savesteps /= 1000 per_savestep = tmax / num_savesteps extradata = ExtraData() # times = np.linspace(0., tmax, savesteps) extradata.meta.tmax = tmax extradata.meta.per_savestep = per_savestep extradata.meta.num_savesteps = num_savesteps extradata.meta.git_hash = git_hash() extradata.meta.rebound_hash = rebound.__githash__ extradata.meta.massloss_method = parameters.massloss_method extradata.meta.initcon_file = parameters.initcon_file extradata.meta.no_merging = parameters.no_merging num_planetesimals, num_embryos = \ add_particles_from_conditions_file(sim, extradata, parameters.initcon_file, testrun) sim.move_to_com() extradata.meta.initial_N = sim.N extradata.meta.initial_N_planetesimal = num_planetesimals extradata.meta.initial_N_embryo = num_embryos extradata.history.append(energy=sim.calculate_energy(), momentum=total_momentum(sim), total_mass=total_mass(sim), time=sim.t, N=sim.N, N_active=sim.N_active) cputimeoffset = walltimeoffset = 0 t = 0 else: if fn.with_suffix(".lock").exists(): raise FileExistsError( "Lock file found, is the simulation currently running?") copy(fn.with_suffix(".bin"), fn.with_suffix(".bak.bin")) copy(fn.with_suffix(".extra.json"), fn.with_suffix(".extra.bak.json")) sa = SimulationArchive(str(fn.with_suffix(".bin"))) extradata = ExtraData.load(fn) tmax = extradata.meta.tmax per_savestep = extradata.meta.per_savestep sim = sa[-1] t = round(sim.t + per_savestep) print(f"continuing from {t}") sim.move_to_com() sim.ri_mercurius.recalculate_coordinates_this_timestep = 1 sim.integrator_synchronize() if extradata.meta.git_hash != git_hash(): print( "warning: The saved output was originally run with another version of the code" ) print(f"original: {extradata.meta.git_hash}") print(f"current: {git_hash()}") num_savesteps = extradata.meta.num_savesteps cputimeoffset = extradata.meta.cputime walltimeoffset = extradata.meta.walltime check_heartbeat_needs_recompile() clibheartbeat = cdll.LoadLibrary("heartbeat/heartbeat.so") clibheartbeat.init_logfile.argtypes = [c_char_p] logfile = create_string_buffer(128) logfile.value = str(fn.with_suffix(".energylog.csv")).encode() clibheartbeat.init_logfile(logfile) sim.heartbeat = clibheartbeat.heartbeat innermost_semimajor_axis = third_kepler_law( orbital_period=sim.dt * year * MIN_TIMESTEP_PER_ORBIT) / astronomical_unit * 1.1 print(f"innermost semimajor axis is {innermost_semimajor_axis}") c_double.in_dll( clibheartbeat, "min_distance_from_sun_squared").value = innermost_semimajor_axis**2 c_double.in_dll(clibheartbeat, "max_distance_from_sun_squared").value = 150**2 assert sim.dt < innermost_period(sim) / MIN_TIMESTEP_PER_ORBIT def collision_resolve_handler(sim_p: POINTER_REB_SIM, collision: reb_collision) -> int: global abort # needed as exceptions don't halt integration try: return merge_particles(sim_p, collision, ed=extradata) except BaseException as exception: print("exception during collision_resolve") print(exception) abort = True sim_p.contents._status = 1 raise exception sim.collision_resolve = collision_resolve_handler # show_orbits(sim) fn.with_suffix(".lock").touch() print("start") while t <= tmax: print() print(f"{t / tmax * 100:.2f}%") set_process_title(fn, t / tmax, sim.N) try: print(f"integrating until {t}") sim.integrate(t, exact_finish_time=0) print("dt", sim.dt) print("t", t) t += per_savestep except NoParticles: print("No Particles left") abort = True print("N", sim.N) print("N_active", sim.N_active) print("fraction", innermost_period(sim) / MIN_TIMESTEP_PER_ORBIT) assert sim.dt < innermost_period(sim) / MIN_TIMESTEP_PER_ORBIT escape: hb_event wide_orbit: hb_event sun_collision: hb_event for escape in hb_event_list.in_dll(clibheartbeat, "hb_escapes"): if not escape.new: continue print("escape:", escape.time, escape.hash) extradata.pdata[escape.hash].escaped = escape.time escape.new = 0 # make sure to not handle it again c_int.in_dll(clibheartbeat, "hb_escape_index").value = 0 for sun_collision in hb_event_list.in_dll(clibheartbeat, "hb_sun_collisions"): if not sun_collision.new: continue print("sun collision:", sun_collision.time, sun_collision.hash) extradata.pdata[ sun_collision.hash].collided_with_sun = sun_collision.time sun_collision.new = 0 c_int.in_dll(clibheartbeat, "hb_sun_collision_index").value = 0 for wide_orbit in hb_event_list.in_dll(clibheartbeat, "hb_wide_orbits"): if not wide_orbit.new: continue print("wide orbit:", wide_orbit.time, wide_orbit.hash) extradata.pdata[wide_orbit.hash].wide_orbit = wide_orbit.time wide_orbit.new = 0 c_int.in_dll(clibheartbeat, "hb_sun_collision_index").value = 0 sim.simulationarchive_snapshot(str(fn.with_suffix(".bin"))) extradata.meta.walltime = time.perf_counter() - start + walltimeoffset extradata.meta.cputime = time.process_time() + cputimeoffset extradata.meta.current_time = t extradata.history.append(energy=sim.calculate_energy(), momentum=total_momentum(sim), total_mass=total_mass(sim), time=sim.t, N=sim.N, N_active=sim.N_active) extradata.save(fn) if abort: print("aborted") exit(1) print("finished") fn.with_suffix(".lock").unlink()