def display_status(data): message = "Application version: " + config.APPLICATION_VERSION common.log_output(message, True) message = "Config file: " + config.CONFIG_FILE common.log_output(message, True) if data.get("parameters_file", None) is not None: param_file = data.get("parameters_file", "") message = "Params file: " + param_file common.log_output(message, True) token = data.get("token", None) token_expires = data.get("token_expires", None) if token is None or token_expires is None: common.log_output("Not logged in", True) sys.exit(2) if data.get("last_login", None) is not None: last_login = data.get("last_login", "") message = "Logged in : " + helper.format_time(last_login) common.log_output(message, True) if token_expires is not None: message = "Expiration : " + helper.format_time(token_expires) common.log_output(message, True)
def display_status(data): message = "Server : " + common.create_baseurl(data) common.log_output(message, True) server_activity, backup_id = fetch_progress_state(data) message = "Server status: " if server_activity.get("OverallProgress", 1) != 1: message += server_activity.get("Phase", None) message += " on backup " + backup_id else: message += "Idle" common.log_output(message, True) message = "Config file : " + config.CONFIG_FILE common.log_output(message, True) if data.get("parameters_file", None) is not None: param_file = data.get("parameters_file", "") message = "Params file : " + param_file common.log_output(message, True) token = data.get("token", None) token_expires = data.get("token_expires", None) if token is None or token_expires is None: common.log_output("Not logged in", True) sys.exit(2) if data.get("last_login", None) is not None: last_login = data.get("last_login", "") message = "Logged in : " + helper.format_time(last_login) common.log_output(message, True) if token_expires is not None: message = "Expiration : " + helper.format_time(token_expires) common.log_output(message, True)
def list_filter(json_input, resource): resource_list = [] if resource == "backups": for key in json_input: backup = key.get("Backup", None) schedule = key.get("Schedule", None) progress_state = key.get("Progress", None) backup_name = backup.get("Name", "") backup = { backup_name: { "ID": backup.get("ID", ""), } } if backup.get('Metadata', {}).get('SourceSizeString') is not None: size = backup.get('Metadata', {}).get('SourceSizeString') backup[backup_name]["Source size"] = size if schedule is not None: next_run = helper.format_time(schedule.get("Time", "")) if next_run is not None: backup[backup_name]["Next run"] = next_run last_run = helper.format_time(schedule.get("LastRun", "")) if last_run is not None: backup[backup_name]["Last run"] = last_run if progress_state is not None: backup[backup_name]["Running"] = { "Task ID": progress_state.get("TaskID", None), "State": progress_state.get("Phase", None), } resource_list.append(backup) elif resource == "notifications": for val in json_input: notification = { val.get("Title", ""): { "Backup ID": val.get("BackupID", ""), "Notification ID": val.get("ID", ""), } } timestamp = helper.format_time(val.get("Timestamp", "")) if timestamp is not None: notification["Timestamp"] = timestamp resource_list.append(notification) elif resource == "serversettings": for key, value in json_input.items(): if key == "update-check-latest": continue setting = {key: {"value": value}} resource_list.append(setting) else: resource_list = json_input return resource_list
def ui_update_recipe(self): if self.cook.command is not None and self.cook.is_running: if self.cook.message: self.ui_message.set_text(self.cook.message) else: self.ui_message.set_text(self.cook.name) self.ui_operation.set_text(self.cook.command.loc_name) args = u"" for key, value in self.cook.command.args.iteritems(): args = args + key + ":" + unicode(value) + u" , " if args: args = args[:-3] self.ui_operation_args.set_text(args) self.ui_time1.set_text(helper.format_time(self.cook.command_time)) self.ui_time2.set_text( helper.format_time(self.cook.command_total_time) if self.cook. command_total_time else "-") self.ui_time3.set_text(helper.format_time(self.cook.time)) self.ui_time4.set_text( helper.format_time(self.cook.total_time) if self.cook. total_time else "-") self.ui_progress.set_fraction(self.cook.time / self.cook.total_time) self.ui_operation_state.set_text(cook.State.repr(self.cook.state)) self.ui_status.set_text(cook.State.repr(self.cook.state)) else: self.ui_message.set_text(self.cook.name) self.ui_operation.set_text("-") self.ui_operation_args.set_text("") self.ui_time1.set_text("") self.ui_time2.set_text("") self.ui_time3.set_text("") self.ui_time4.set_text("") self.ui_progress.set_fraction(0.0) self.ui_operation_state.set_text("") self.ui_status.set_text("") self.ui_freq1.set_text("%.1f" % (self.cook.device.cp_freq_current / 100.0)) self.ui_freq2.set_text("%.1f" % (self.cook.device.cp_freq_current / 100.0)) self.ui_par.set_text("") self.ui_direction.set_text( cp2000.Direction.repr(self.cook.device.direction))
def ui_update_recipe(self): if self.cook.command is not None and self.cook.is_running: if self.cook.message: self.ui_message.set_text(self.cook.message) else: self.ui_message.set_text(self.cook.name) self.ui_operation.set_text(self.cook.command.loc_name) args = u"" for key, value in self.cook.command.args.iteritems(): args = args + key + ":" + unicode(value) + u" , " if args: args = args[:-3] self.ui_operation_args.set_text(args) self.ui_time1.set_text(helper.format_time(self.cook.command_time)) self.ui_time2.set_text(helper.format_time(self.cook.command_total_time) if self.cook.command_total_time else "-") self.ui_time3.set_text(helper.format_time(self.cook.time)) self.ui_time4.set_text(helper.format_time(self.cook.total_time) if self.cook.total_time else "-") self.ui_progress.set_fraction(self.cook.time / self.cook.total_time) self.ui_operation_state.set_text(cook.State.repr(self.cook.state)) self.ui_status.set_text(cook.State.repr(self.cook.state)) else: self.ui_message.set_text(self.cook.name) self.ui_operation.set_text("-") self.ui_operation_args.set_text("") self.ui_time1.set_text("") self.ui_time2.set_text("") self.ui_time3.set_text("") self.ui_time4.set_text("") self.ui_progress.set_fraction(0.0) self.ui_operation_state.set_text("") self.ui_status.set_text("") self.ui_freq1.set_text("%.1f" % (self.cook.device.cp_freq_current/100.0)) self.ui_freq2.set_text("%.1f" % (self.cook.device.cp_freq_current/100.0)) self.ui_par.set_text("") self.ui_direction.set_text(cp2000.Direction.repr(self.cook.device.direction))
def get_live_logs(data, level, page_size, first_id, output_type): baseurl = common.create_baseurl(data, "/api/v1/logdata/poll") cookies = common.create_cookies(data) headers = common.create_headers(data) verify = data.get("server", {}).get("verify", True) params = {'level': level, 'id': first_id, 'pagesize': page_size} r = requests.get(baseurl, headers=headers, cookies=cookies, params=params, verify=verify) common.check_response(data, r.status_code) if r.status_code == 500: message = "Error getting log, " message += "database may be locked by backup" common.log_output(message, True) return elif r.status_code != 200: common.log_output("Error getting log", True, r.status_code) return result = r.json()[-page_size:] logs = [] for log in result: log["When"] = helper.format_time(data, log.get("When", "")) logs.append(log) if len(logs) == 0: common.log_output("No log entries found", True) return helper.output_dump(logs, output_type)
def __init_time_bar(self, duration, interval): pos = avg.Point2D(58, 0) size = avg.Point2D(self.width - pos.x - 10, 60) self.__time_bar = avg.DivNode(pos=pos, size=size, parent=self) avg.WordsNode(pos=(0, 0), color=global_values.COLOR_FOREGROUND, fontsize=global_values.FONT_SIZE, text="Time range", parent=self.__time_bar) self.__time_slider = custom_slider.IntervalScrollBar( pos=(0, 27), width=size.x, range=(0, duration), thumbExtent=duration, parent=self.__time_bar) self.__time_slider.subscribe( custom_slider.IntervalScrollBar.THUMB_POS_CHANGED, self.__on_scroll) self.__start_label = avg.WordsNode( pos=(0, 48), color=global_values.COLOR_FOREGROUND, text="0:00 ({})".format(helper.format_time(interval[0], False)), fontsize=global_values.FONT_SIZE, parent=self.__time_bar) self.__end_label = avg.WordsNode( pos=(size.x, 48), color=global_values.COLOR_FOREGROUND, text="({}) {}".format(helper.format_time(interval[1], False), helper.format_time(self.__duration, False)), alignment="right", fontsize=global_values.FONT_SIZE, parent=self.__time_bar) self.__cur_time_line = avg.LineNode(color=global_values.COLOR_WHITE, sensitive=False, parent=self.__time_bar) self.__duration_time_label = avg.WordsNode( pos=(size.x, 0), color=global_values.COLOR_FOREGROUND, alignment="right", fontsize=global_values.FONT_SIZE, parent=self.__time_bar)
def follow_function(data, function, interval=5): try: while True: compatibility.clear_prompt() function() timestamp = helper.format_time(data, datetime.datetime.now()) common.log_output(timestamp, True) common.log_output("Press control+C to quit", True) time.sleep(interval) except KeyboardInterrupt: return
def __show_users(self, time): set_timestamp = True helper.unlink_node_list(self.__user_nodes) self.__user_nodes = [] for i, user in enumerate(self.__users): if not self._vis_params.get_user_visible(i): continue if self.__mode == "user" and user.headInfoCount == 0 or self.__mode == "device" and user.deviceEntryInfoCount == 0: continue if self.__mode == "user": pos = user.getHeadPos(time) viewpt = (self._x_axis.value_to_pixel(user.getWallViewpoint(time).x), self._y_axis.value_to_pixel(0)) if set_timestamp: head_data = user.getHeadData(time) self.__timestamp_words_node.text = "{}\n{}".format( datetime.datetime.fromtimestamp(head_data.time).strftime("%H:%M:%S.%f"), helper.format_time(head_data.time - self.__session_start_time) ) set_timestamp = False elif self.__mode == "device": pos = user.getDeviceEntry(time).spacePos viewpt = (self._x_axis.value_to_pixel(user.getDeviceWallViewpoint(time).x), self._y_axis.value_to_pixel(0)) if set_timestamp: device_entry = user.getDeviceEntry(time) self.__timestamp_words_node.text = "{}\n{}".format( datetime.datetime.fromtimestamp(device_entry.time).strftime("%H:%M:%S.%f"), helper.format_time(device_entry.time - self.__session_start_time) ) set_timestamp = False else: return pixel_pos = avg.Point2D(self._x_axis.value_to_pixel(pos[0]), self._y_axis.value_to_pixel(pos[2])) node = UserNode(user.getUserID(), pos=pixel_pos, viewpt=viewpt, parent=self._data_div) self.__user_nodes.append(node)
def __update_time(self, vis_params): cur_time = vis_params.highlight_time line_x = (cur_time / self.__duration) * self.__time_slider.width self.__cur_time_line.pos1 = (line_x, 23) self.__cur_time_line.pos2 = (line_x, 50) # Check if something has changed. if self.__time_interval == vis_params.get_time_interval(): return # Set the new interval. self.__time_interval = vis_params.get_time_interval()[:] self.__time_slider.setThumbExtent(self.__time_interval[1] - self.__time_interval[0]) self.__time_slider.setThumbPos(self.__time_interval[0]) self.__start_label.text = "0:00 ({})".format( helper.format_time(self.__time_interval[0], False)) self.__end_label.text = "({}) {}".format( helper.format_time(self.__time_interval[1], False), helper.format_time(self.__duration, False)) self.__duration_time_label.text = "Interval duration: " + helper.format_time( self.__time_interval[1] - self.__time_interval[0], False)
async def info(self, context): response = await context.send( embed=create_embed({ 'title': 'Loading bot info...', 'color': discord.Color.gold() })) try: uptime = round(time.time() - self.uptime) uptime_text = format_time(uptime) ping = round(self.client.latency * 1000) invite_url = discord.utils.oauth_url( client_id=CLIENT_ID, permissions=discord.Permissions(8)) connected_servers = 0 members_watching = 0 user_ids = [] for guild in self.client.guilds: connected_servers += 1 for member in guild.members: members_watching += 1 if not member.id in user_ids: user_ids.append(member.id) users_watching = len(user_ids) await response.edit(embed=create_embed( { 'title': 'Invite', 'url': invite_url, 'inline': True, }, { 'Ping': f'{ping} ms', 'Uptime': uptime_text, 'Connected Servers': connected_servers, 'Users Watching': members_watching, 'Unique Users Watching': users_watching })) except Exception as error_message: await response.edit(embed=create_embed( { 'title': 'Could not load bot info', 'color': discord.Color.red() }, {'Error Message': error_message})) print(f'Cannot load bot info') print(error_message)
def __format_label(self, value): if self.__unit is "m": # meters # cut zeros if value is integer if value % 1 in (0, 0.0): value = int(value) else: value = round(value, 4) return "{} m".format(value) elif self.__unit is "s": # seconds return helper.format_time(value) elif self.__unit is "px": # pixels return "" elif self.__unit is "user": return "" elif self.__unit is "own": return self.__tick_labels[value] assert False
def __init__(self): cwd = os.getcwd() self.call_list = [] self.msg_list = [] for file in os.listdir(cwd + '/excel'): def open_excel(ending, dict_name): if file.endswith(ending): wb = xlrd.open_workbook('excel/' + file) sh = wb.sheet_by_index(0) for rownum in reversed(range(1, wb.sheet_by_index(0).nrows)): dict_name.append(sh.row_values(rownum)) open_excel('通信.xls', self.call_list) open_excel('彩信.xls', self.msg_list) self.phone_list = set() self.msg_phone_list = set() phone_location = {} for call in self.call_list: # convert to seconds call[3] = format_time(call[3]) self.phone_list.add(call[5]) phone_location[call[5]] = call[7] for msg in self.msg_list: self.msg_phone_list.add(msg[3]) self.month_list = [] first = datetime.date.today().replace(day=1) i = 6 while i > 0: self.month_list.append(first) first -= dateutil.relativedelta.relativedelta(months=1) i -= 1 # msg location for msg in self.msg_list: # same number if msg[3] in phone_location: msg.append(phone_location[msg[3]]) else: msg.append('')
def notification_filter(data, json_input): notification_list = [] for key in json_input: title = key.get("Title", "Notification") notification = { title: { "Backup ID": key.get("BackupID", ""), "Notification ID": key.get("ID", ""), "Message": key.get("Message", ""), "Type": key.get("Type", ""), } } timestamp = helper.format_time(data, key.get("Timestamp", "")) if timestamp is not None: notification[title]["Timestamp"] = timestamp notification_list.append(notification) return notification_list
def worker(gpu, ngpus_per_node, args): env_device, train_device = args_initialize(gpu, ngpus_per_node, args) train_env, test_env, observation = env_initialize(args, env_device) train_csv_file, train_csv_writer, eval_csv_file, eval_csv_writer, summary_writer = log_initialize( args, train_device) model = ActorCritic(args.num_stack, train_env.action_space, BasicBlock, normalize=args.normalize, name=args.env_name) model, optimizer = model_initialize(args, model, train_device) if (args.num_ales % args.num_minibatches) != 0: raise ValueError( 'Number of ales({}) size is not even divisible by the minibatch size({})' .format(args.num_ales, args.num_minibatches)) if args.num_steps_per_update == -1: args.num_steps_per_update = args.num_steps minibatch_size = int(args.num_ales / args.num_minibatches) print("minibatch_size", minibatch_size) step0 = args.num_steps - args.num_steps_per_update n_minibatch = -1 # This is the number of frames GENERATED between two updates num_frames_per_iter = args.num_ales * args.num_steps_per_update total_steps = math.ceil( args.t_max / (args.world_size * num_frames_per_iter)) #number of total frame shape = (args.num_steps + 1, args.num_ales, args.num_stack, *train_env.observation_space.shape[-2:]) states = torch.zeros(shape, device=train_device, dtype=torch.float32) states[step0, :, -1] = observation.to(device=train_device, dtype=torch.float32) shape = (args.num_steps + 1, args.num_ales) values = torch.zeros(shape, device=train_device, dtype=torch.float32) logits = torch.zeros( (args.num_steps + 1, args.num_ales, train_env.action_space.n), device=train_device, dtype=torch.float32) returns = torch.zeros(shape, device=train_device, dtype=torch.float32) shape = (args.num_steps, args.num_ales) rewards = torch.zeros(shape, device=train_device, dtype=torch.float32) masks = torch.zeros(shape, device=train_device, dtype=torch.float32) actions = torch.zeros(shape, device=train_device, dtype=torch.long) actions_one_hot = torch.zeros((args.num_steps, args.num_ales, 18), device=train_device, dtype=torch.long) actions_space = torch.zeros(18, device=train_device, dtype=torch.long) #for LSTM lstm_hidden_state = torch.zeros((args.num_steps + 1, args.num_ales, 256), device=train_device, dtype=torch.float32) mus = torch.ones(shape, device=train_device, dtype=torch.float32) # pis = torch.zeros(shape, device=train_device, dtype=torch.float32) rhos = torch.zeros((args.num_steps, minibatch_size), device=train_device, dtype=torch.float32) # These variables are used to compute average rewards for all processes. episode_rewards = torch.zeros(args.num_ales, device=train_device, dtype=torch.float32) final_rewards = torch.zeros(args.num_ales, device=train_device, dtype=torch.float32) episode_lengths = torch.zeros(args.num_ales, device=train_device, dtype=torch.float32) final_lengths = torch.zeros(args.num_ales, device=train_device, dtype=torch.float32) #init replay memory #mem = ReplayMemory(observation.to(device=train_device, dtype=torch.float32),args,train_device) torch.cuda.synchronize() iterator = range(total_steps) if args.rank == 0: iterator = tqdm(iterator) total_time = 0 evaluation_offset = 0 opt_step = 0 aux_task = False for update in iterator: T = args.world_size * update * num_frames_per_iter if (args.rank == 0) and (T >= evaluation_offset): print("===========evaluating=========") evaluation_offset += args.evaluation_interval torch.save(model.state_dict(), "./model_save") eval_lengths, eval_rewards = test(args, model, test_env) lmean, lmedian, lmin, lmax, lstd = gen_data(eval_lengths) rmean, rmedian, rmin, rmax, rstd = gen_data(eval_rewards) length_data = '(length) min/max/mean/median: {lmin:4.1f}/{lmax:4.1f}/{lmean:4.1f}/{lmedian:4.1f}'.format( lmin=lmin, lmax=lmax, lmean=lmean, lmedian=lmedian) reward_data = '(reward) min/max/mean/median: {rmin:4.1f}/{rmax:4.1f}/{rmean:4.1f}/{rmedian:4.1f}'.format( rmin=rmin, rmax=rmax, rmean=rmean, rmedian=rmedian) print('[training time: {}] {}'.format( format_time(total_time), ' --- '.join([length_data, reward_data]))) if eval_csv_writer and eval_csv_file: eval_csv_writer.writerow([ T, total_time, rmean, rmedian, rmin, rmax, rstd, lmean, lmedian, lmin, lmax, lstd ]) eval_csv_file.flush() if args.plot: summary_writer.add_scalar('eval/rewards_mean', rmean, T, walltime=total_time) summary_writer.add_scalar('eval/lengths_mean', lmean, T, walltime=total_time) start_time = time.time() with torch.no_grad(): for step in range(args.num_steps_per_update): #nvtx.range_push('train:step') #value, logit = model(states[step0 + step])#,lstm_hidden_state[step0+step]) value, logit, lstm_hidden_state[step0 + step] = model( states[step0 + step], args, lstm_hidden_state[step0], actions_one_hot[step], rewards[step]) # store values and logits values[step0 + step] = value.squeeze(-1) # convert actions to numpy and perform next step probs = torch.clamp(F.softmax(logit, dim=1), min=0.00001, max=0.99999) probs_action = probs.multinomial(1).to(env_device) actions_space[probs_action] = 1 torch.cuda.current_stream().synchronize() observation, reward, done, info = train_env.step(probs_action) observation = observation.squeeze(-1).unsqueeze(1) # move back to training memory observation = observation.to(device=train_device) reward = reward.to(device=train_device, dtype=torch.float32) done = done.to(device=train_device, dtype=torch.bool) probs_action = probs_action.to(device=train_device, dtype=torch.long) not_done = 1.0 - done.float() lstm_hidden_state[step0 + step] *= not_done[:, None] # update rewards and actions actions[step0 + step].copy_(probs_action.view(-1)) actions_one_hot[step0 + step].copy_(actions_space) masks[step0 + step].copy_(not_done) rewards[step0 + step].copy_(reward.sign()) #mus[step0 + step] = F.softmax(logit, dim=1).gather(1, actions[step0 + step].view(-1).unsqueeze(-1)).view(-1) mus[step0 + step] = torch.clamp(F.softmax(logit, dim=1).gather( 1, actions[step0 + step].view(-1).unsqueeze(-1)).view(-1), min=0.00001, max=0.99999) # update next observations states[step0 + step + 1, :, :-1].copy_(states[step0 + step, :, 1:]) states[step0 + step + 1] *= not_done.view( -1, *[1] * (observation.dim() - 1)) states[step0 + step + 1, :, -1].copy_(observation.view(-1, *states.size()[-2:])) # update episodic reward counters episode_rewards += reward final_rewards[done] = episode_rewards[done] episode_rewards *= not_done episode_lengths += not_done final_lengths[done] = episode_lengths[done] episode_lengths *= not_done nvtx.range_pop() #APPENDING observation #mem.append(Experience(obs=observation.to(device=train_device, dtype=torch.float32),action=probs_action.view(-1).unsqueeze(1),reward=reward.unsqueeze(1)))#,done=done.unsqueeze(1))) # mem.append(Experience(obs=observation.to(device=train_device, dtype=torch.float32),reward=reward.unsqueeze(1))) # if (opt_step >100 and opt_step %50 ==0): # mem.clearMemory(); #clear half of memory every 50 steps n_minibatch = (n_minibatch + 1) % args.num_minibatches min_ale_index = int(n_minibatch * minibatch_size) max_ale_index = min_ale_index + minibatch_size #to cat with output from FC and last reward #actions_one_hot= torch.cat([torch.zeros(args.num_steps,minibatch_size,18).to(device=train_device,dtype=torch.long),actions_one_hot[:,min_ale_index:max_ale_index,:]]) nvtx.range_push('train:compute_values') # not sure about the LSTM input ouput value, logit ,lstm_hidden_state[:,min_ale_index:max_ale_index] = model(states[:, min_ale_index:max_ale_index, :, :, :].contiguous().view(-1, *states.size()[-3:]),\ args,lstm_hidden_state[:,min_ale_index:max_ale_index].contiguous(), actions_one_hot[:,min_ale_index:max_ale_index,:].contiguous().view(-1,18),\ rewards[:,min_ale_index:max_ale_index].contiguous().view(-1,1)) batch_value = value.detach().view((args.num_steps + 1, minibatch_size)) batch_probs = F.softmax(logit.detach()[:(args.num_steps * minibatch_size), :], dim=1) batch_pis = batch_probs.gather( 1, actions[:, min_ale_index:max_ale_index].contiguous().view( -1).unsqueeze(-1)).view((args.num_steps, minibatch_size)) returns[-1, min_ale_index:max_ale_index] = batch_value[-1] with torch.no_grad(): for step in reversed(range(args.num_steps)): c = torch.clamp(batch_pis[step, :] / mus[step, min_ale_index:max_ale_index], max=args.c_hat) rhos[step, :] = torch.clamp( batch_pis[step, :] / mus[step, min_ale_index:max_ale_index], max=args.rho_hat) delta_value = rhos[step, :] * ( rewards[step, min_ale_index:max_ale_index] + (args.gamma * batch_value[step + 1] - batch_value[step]).squeeze()) returns[step, min_ale_index:max_ale_index] = \ batch_value[step, :].squeeze() + delta_value + args.gamma * c * \ (returns[step + 1, min_ale_index:max_ale_index] - batch_value[step + 1, :].squeeze()) value = value[:args.num_steps * minibatch_size, :] logit = logit[:args.num_steps * minibatch_size, :] log_probs = F.log_softmax(logit, dim=1) probs = F.softmax(logit, dim=1) action_log_probs = log_probs.gather( 1, actions[:, min_ale_index:max_ale_index].contiguous().view( -1).unsqueeze(-1)) dist_entropy = -(log_probs * probs).sum(-1).mean() advantages = returns[:-1, min_ale_index:max_ale_index].contiguous( ).view(-1).unsqueeze(-1) - value value_loss = advantages.pow(2).mean() policy_loss = -(action_log_probs * rhos.view(-1, 1).detach() * \ (rewards[:, min_ale_index:max_ale_index].contiguous().view(-1, 1) + args.gamma * \ returns[1:, min_ale_index:max_ale_index].contiguous().view(-1, 1) - value).detach()).mean() nvtx.range_pop() nvtx.range_push('train:backprop') # auxliary task from UNREAL https://arxiv.org/pdf/1611.05397 #REWARD PREDICTION # if (opt_step>100 and opt_step%20 == 0): # aux_task = True # obs = [] # for i in range(20): # obs.append(mem.rp()) # states_,batch_rp_c= process_rp(obs) # rp_c = model(states_,args,aux_task='rp')+1e-7 # print(rp_c) # # rp_loss = -torch.sum(batch_rp_c.to(device=train_device) * torch.log(rp_c))/20/3 # print("---------------rp_loss---",rp_loss) # # ##################################################### ### pixel change loss # # obs_=[] # # #for i in range(32):TODO BATCH LATER # # obs_.append(mem.pc()) # # # # states_pc,batch_pc_a,batch_pc_R = process_pc(obs_,model,train_device) # # print(len(states_pc)) # # print(states_pc[0].shape) # # print(batch_pc_a[0].shape) # # print(batch_pc_R[0].shape) # # print(torch.cat(states_pc).shape) # # print(stop) # #torch.Size([5, 4, 84, 84]) # #torch.Size([18]) # #torch.Size([5, 20, 20]) # #print(torch.cat(states_pc).shape) # #print(stop) # # states_pc = torch.cat(states_pc).view(-1,4,84,84) # # pc_q, pc_q_max = model(states_pc,aux_task='pc') # # print(pc_q_max.shape) # # pc_a_reshape = batch_pc_a[0].view(-1,train_env.action_space.n,1,1) # # pc_qa_ = torch.mul(pc_q,pc_a_reshape) # # pc_qa = torch.sum (pc_qa_, dim=1,keepdim =False) # # # # print(pc_qa.shape) # # print(batch_pc_R[0].shape) # # print(pc_qa.shape) # # pc_loss = torch.sum( (( batch_pc_R[0]-pc_qa)**2/2.) ) # # print(pc_loss) # # print(stop) # loss = value_loss * args.value_loss_coef + policy_loss - dist_entropy * args.entropy_coef # if aux_task ==True: # loss += rp_loss # aux_task = False optimizer.zero_grad() with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward(retain_graph=True) master_params = amp.master_params(optimizer) torch.nn.utils.clip_grad_norm_(master_params, args.max_grad_norm) optimizer.step() opt_step += 1 #nvtx.range_pop() #nvtx.range_push('train:next_states') for step in range(0, args.num_steps_per_update): states[:-1, :, :, :, :] = states[1:, :, :, :, :] rewards[:-1, :] = rewards[1:, :] actions[:-1, :] = actions[1:, :] # actions_one_hot[:-1,:] = actions_one_hot[1:,:] # lstm_hidden_state[:-1,:] = lstm_hidden_state [1:,:] masks[:-1, :] = masks[1:, :] mus[:-1, :] = mus[1:, :] #nvtx.range_pop() torch.cuda.synchronize() if args.rank == 0: iter_time = time.time() - start_time total_time += iter_time if args.plot: summary_writer.add_scalar('train/rewards_mean', final_rewards.mean().item(), T, walltime=total_time) summary_writer.add_scalar('train/lengths_mean', final_lengths.mean().item(), T, walltime=total_time) summary_writer.add_scalar('train/value_loss', value_loss, T, walltime=total_time) summary_writer.add_scalar('train/policy_loss', policy_loss, T, walltime=total_time) summary_writer.add_scalar('train/entropy', dist_entropy, T, walltime=total_time) progress_data = callback(args, model, T, iter_time, final_rewards, final_lengths, value_loss, policy_loss, dist_entropy, train_csv_writer, train_csv_file) iterator.set_postfix_str(progress_data) if args.plot and (args.rank == 0): # name = '{}.pth'.format("BO_PC") # torch.save(model.module.state_dict(), "Pong_1gpu_1200") summary_writer.close()
def worker(gpu, ngpus_per_node, args): args.gpu = gpu if args.distributed: args.seed += args.gpu torch.cuda.set_device(args.gpu) args.rank = int(os.environ['RANK']) if 'RANK' in os.environ else 0 if args.multiprocessing_distributed: args.rank = args.rank * ngpus_per_node + args.gpu torch.distributed.init_process_group( backend='nccl', init_method='tcp://127.0.0.1:8632', world_size=args.world_size, rank=args.rank) else: args.rank = 0 args.use_cuda_env = args.use_cuda_env and torch.cuda.is_available() args.no_cuda_train = not torch.cuda.is_available() args.verbose = args.verbose and (args.rank == 0) env_device = torch.device( 'cuda', args.gpu) if args.use_cuda_env else torch.device('cpu') train_device = torch.device('cuda', args.gpu) if ( args.no_cuda_train == False) else torch.device('cpu') # Setup np.random.seed(args.seed) torch.manual_seed(np.random.randint(1, 10000)) if args.use_cuda_env or (args.no_cuda_train == False): torch.cuda.manual_seed(random.randint(1, 10000)) if train_device.type == 'cuda': print('Train:\n' + cuda_device_str(train_device.index), flush=True) if args.use_openai: test_env = create_vectorize_atari_env(args.env_name, args.seed, args.evaluation_episodes, episode_life=False, clip_rewards=False) test_env.reset() else: test_env = AtariEnv(args.env_name, args.evaluation_episodes, color_mode='gray', device='cpu', rescale=True, clip_rewards=False, episodic_life=False, repeat_prob=0.0, frameskip=4) # Agent dqn = Agent(args, test_env.action_space) # Construct validation memory if args.rank == 0: print('Initializing evaluation memory with {} entries...'.format( args.evaluation_size), end='', flush=True) start_time = time.time() val_mem = initialize_validation(args, train_device) if args.rank == 0: print('complete ({})'.format(format_time(time.time() - start_time)), flush=True) if args.evaluate: dqn.eval() rewards, lengths, avg_Q = test(args, 0, dqn, val_mem, test_env, train_device) # Test else: if args.rank == 0: print('Entering main training loop', flush=True) if args.output_filename: csv_file = open(args.output_filename, 'w', newline='') csv_file.write(json.dumps(vars(args))) csv_file.write('\n') csv_writer = csv.writer(csv_file, delimiter=',') csv_writer.writerow([ 'frames', 'total_time', 'rmean', 'rmedian', 'rstd', 'rmin', 'rmax', 'lmean', 'lmedian', 'lstd', 'lmin', 'lmax' ]) else: csv_writer, csv_file = None, None if args.plot: from tensorboardX import SummaryWriter current_time = datetime.now().strftime('%b%d_%H-%M-%S') log_dir = os.path.join( args.log_dir, current_time + '_' + socket.gethostname()) writer = SummaryWriter(log_dir=log_dir) for k, v in vars(args).items(): writer.add_text(k, str(v)) # Environment print('Initializing environments...', end='', flush=True) start_time = time.time() if args.use_openai: train_env = create_vectorize_atari_env( args.env_name, args.seed, args.num_ales, episode_life=True, clip_rewards=args.reward_clip, max_frames=args.max_episode_length) observation = torch.from_numpy(train_env.reset()).squeeze(1) else: train_env = AtariEnv(args.env_name, args.num_ales, color_mode='gray', device=env_device, rescale=True, clip_rewards=args.reward_clip, episodic_life=True, repeat_prob=0.0) train_env.train() observation = train_env.reset( initial_steps=args.ale_start_steps, verbose=args.verbose).clone().squeeze(-1) if args.rank == 0: print('complete ({})'.format(format_time(time.time() - start_time)), flush=True) # These variables are used to compute average rewards for all processes. episode_rewards = torch.zeros(args.num_ales, device=train_device, dtype=torch.float32) episode_lengths = torch.zeros(args.num_ales, device=train_device, dtype=torch.float32) final_rewards = torch.zeros(args.num_ales, device=train_device, dtype=torch.float32) final_lengths = torch.zeros(args.num_ales, device=train_device, dtype=torch.float32) has_completed = torch.zeros(args.num_ales, device=train_device, dtype=torch.uint8) mem = ReplayMemory(args, args.memory_capacity, train_device) mem.reset(observation) priority_weight_increase = (1 - args.priority_weight) / ( args.t_max - args.learn_start) state = torch.zeros((args.num_ales, args.history_length, 84, 84), device=mem.device, dtype=torch.float32) state[:, -1] = observation.to(device=mem.device, dtype=torch.float32).div(255.0) num_frames_per_iter = args.num_ales total_steps = math.ceil(args.t_max / (args.world_size * num_frames_per_iter)) epsilons = np.linspace( args.epsilon_start, args.epsilon_final, math.ceil(args.epsilon_frames / num_frames_per_iter)) epsilon_offset = math.ceil(args.learn_start / num_frames_per_iter) prefetcher = data_prefetcher(args.batch_size, train_device, mem) avg_loss = 'N/A' eval_offset = 0 target_update_offset = 0 total_time = 0 env_time = 0 mem_time = 0 net_time = 0 fps_steps = 0 fps_start_time = time.time() # main loop iterator = range(total_steps) if args.rank == 0: iterator = tqdm(iterator) env_stream = torch.cuda.Stream() train_stream = torch.cuda.Stream() for update in iterator: T = args.world_size * update * num_frames_per_iter epsilon = epsilons[min( update - epsilon_offset, len(epsilons) - 1)] if T >= args.learn_start else epsilons[0] start_time = time.time() if update % args.replay_frequency == 0: dqn.reset_noise() # Draw a new set of noisy weights dqn.eval() nvtx.range_push('train:select action') if args.noisy_linear: action = dqn.act( state) # Choose an action greedily (with noisy weights) else: action = dqn.act_e_greedy(state, epsilon=epsilon) nvtx.range_pop() dqn.train() fps_steps += 1 if args.use_openai: action = action.cpu().numpy() torch.cuda.synchronize() with torch.cuda.stream(env_stream): nvtx.range_push('train:env step') if args.use_openai: observation, reward, done, info = train_env.step( action) # Step # convert back to pytorch tensors observation = torch.from_numpy(observation).squeeze(1) reward = torch.from_numpy(reward.astype(np.float32)) done = torch.from_numpy(done.astype(np.uint8)) action = torch.from_numpy(action) else: observation, reward, done, info = train_env.step( action, asyn=True) # Step observation = observation.clone().squeeze(-1) nvtx.range_pop() observation = observation.to(device=train_device) reward = reward.to(device=train_device) done = done.to(device=train_device) action = action.to(device=train_device) delta = time.time() - start_time env_time += delta total_time += delta observation = observation.float().div_(255.0) state[:, :-1].copy_(state[:, 1:].clone()) state *= (1 - done).view(-1, 1, 1, 1).float() state[:, -1].copy_(observation) # update episodic reward counters not_done = (1 - done).float() has_completed |= (done == 1) episode_rewards += reward.float() final_rewards[done] = episode_rewards[done] episode_rewards *= not_done episode_lengths += not_done final_lengths[done] = episode_lengths[done] episode_lengths *= not_done # Train and test if T >= args.learn_start: mem.priority_weight = min( mem.priority_weight + priority_weight_increase, 1) # Anneal importance sampling weight β to 1 prefetcher.preload() avg_loss = 0.0 num_minibatches = min( int(args.num_ales / args.replay_frequency), 8) for _ in range(num_minibatches): # Sample transitions start_time = time.time() nvtx.range_push('train:sample states') idxs, states, actions, returns, next_states, nonterminals, weights = prefetcher.next( ) nvtx.range_pop() delta = time.time() - start_time mem_time += delta total_time += delta start_time = time.time() nvtx.range_push('train:network update') loss = dqn.learn(states, actions, returns, next_states, nonterminals, weights) nvtx.range_pop() delta = time.time() - start_time net_time += delta total_time += delta start_time = time.time() nvtx.range_push('train:update priorities') mem.update_priorities( idxs, loss) # Update priorities of sampled transitions nvtx.range_pop() delta = time.time() - start_time mem_time += delta total_time += delta avg_loss += loss.mean().item() avg_loss /= num_minibatches # Update target network if T >= target_update_offset: dqn.update_target_net() target_update_offset += args.target_update torch.cuda.current_stream().wait_stream(env_stream) torch.cuda.current_stream().wait_stream(train_stream) start_time = time.time() nvtx.range_push('train:append memory') mem.append(observation, action, reward, done) # Append transition to memory nvtx.range_pop() delta = time.time() - start_time mem_time += delta total_time += delta fps_end_time = time.time() fps = (args.world_size * fps_steps * args.num_ales) / (fps_end_time - fps_start_time) fps_start_time = fps_end_time fps_steps = 0 if args.rank == 0: if args.plot and ((update % args.replay_frequency) == 0): writer.add_scalar('train/epsilon', epsilon, T) writer.add_scalar('train/rewards', final_rewards.mean(), T) writer.add_scalar('train/lengths', final_lengths.mean(), T) if T >= eval_offset: eval_start_time = time.time() dqn.eval() # Set DQN (online network) to evaluation mode rewards, lengths, avg_Q = test(args, T, dqn, val_mem, test_env, train_device) dqn.train( ) # Set DQN (online network) back to training mode eval_total_time = time.time() - start_time eval_offset += args.evaluation_interval rmean, rmedian, rstd, rmin, rmax = vec_stats(rewards) lmean, lmedian, lstd, lmin, lmax = vec_stats(lengths) print('reward: {:4.2f}, {:4.0f}, {:4.0f}, {:4.4f} | ' 'length: {:4.2f}, {:4.0f}, {:4.0f}, {:4.4f} | ' 'Avg. Q: {:4.4f} | {} | Overall FPS: {:4.2f}'.format( rmean, rmin, rmax, rstd, lmean, lmin, lmax, lstd, avg_Q, format_time(eval_total_time), fps), flush=True) if args.output_filename and csv_writer and csv_file: csv_writer.writerow([ T, total_time, rmean, rmedian, rstd, rmin, rmax, lmean, lmedian, lstd, lmin, lmax ]) csv_file.flush() if args.plot: writer.add_scalar('eval/rewards', rmean, T) writer.add_scalar('eval/lengths', lmean, T) writer.add_scalar('eval/avg_Q', avg_Q, T) loss_str = '{:4.4f}'.format(avg_loss) if isinstance( avg_loss, float) else avg_loss progress_data = 'T = {:,} epsilon = {:4.2f} avg reward = {:4.2f} loss: {} ({:4.2f}% net, {:4.2f}% mem, {:4.2f}% env)' \ .format(T, epsilon, final_rewards.mean().item(), loss_str, \ *percent_time(total_time, net_time, mem_time, env_time)) iterator.set_postfix_str(progress_data) if args.plot and (args.rank == 0): writer.close() if args.use_openai: train_env.close() test_env.close()
def backup_filter(data, json_input): backup_list = [] for key in json_input: backup = key.pop("Backup", {}) metadata = backup.pop("Metadata", {}) backup_name = backup.pop("Name", {}) backup = { "ID": backup.get("ID", ""), "Local database": backup.get("DBPath", ""), } backup["Versions"] = int(metadata.get("BackupListCount", 0)) backup["Last run"] = { "Duration": helper.format_duration(metadata.get("LastBackupDuration", "0")), "Started": helper.format_time(data, metadata.get("LastBackupStarted", "0")), "Stopped": helper.format_time(data, metadata.get("LastBackupFinished", "0")), } backup["Size"] = { "Local": metadata.get("SourceSizeString", ""), "Backend": metadata.get("TargetSizeString", "") } schedule = key.get("Schedule", None) if schedule is not None: next_run = helper.format_time(data, schedule.pop("Time", "")) if next_run is not None: schedule["Next run"] = next_run last_run = helper.format_time(data, schedule.pop("LastRun", "")) if last_run is not None: schedule["Last run"] = last_run schedule.pop("AllowedDays", None) schedule.pop("ID", None) schedule.pop("Rule", None) schedule.pop("Tags", None) backup["Schedule"] = schedule progress_state = key.get("Progress", None) if progress_state is not None: state = progress_state.get("Phase", None) speed = progress_state.get("BackendSpeed", 0) progress = { "State": state, "Counting files": progress_state.get("StillCounting", False), "Backend": { "Action": progress_state.get("BackendAction", 0) }, "Task ID": progress_state.get("TaskID", -1), } if speed > 0: readable_speed = helper.format_bytes(speed) + "/s" progress["Backend"]["Speed"] = readable_speed # Display item only if relevant if not progress_state.get("StillCounting", False): progress.pop("Counting files") # Avoid 0 division file_count = progress_state.get("ProcessedFileCount", 0) total_file_count = progress_state.get("TotalFileCount", 0) processing = state == "Backup_ProcessingFiles" if file_count > 0 and total_file_count > 0 and processing: processed = "{0:.2f}".format(file_count / total_file_count * 100) progress["Processed files"] = processed + "%" # Avoid 0 division data_size = progress_state.get("ProcessedFileSize", 0) total_data_size = progress_state.get("TotalFileSize", 0) processing = state == "Backup_ProcessingFiles" if data_size > 0 and total_data_size > 0 and processing: # Calculate percentage processed = "{0:.2f}".format(data_size / total_data_size * 100) # Format text "x% (y GB of z GB)" processed += "% (" + str(helper.format_bytes(data_size)) processed += " of " processed += str(helper.format_bytes(total_data_size)) + ")" progress["Processed data"] = processed # Avoid 0 division current = progress_state.get("BackendFileProgress", 0) total = progress_state.get("BackendFileSize", 0) if current > 0 and total > 0: backend_progress = "{0:.2f}".format(current / total * 100) progress["Backend"]["Progress"] = backend_progress + "%" backup["Progress"] = progress key = {backup_name: backup} backup_list.append(key) return backup_list
def list_filter(data, json_input, resource): resource_list = [] if resource == "backups": for key in json_input: backup = key.get("Backup", None) schedule = key.get("Schedule", None) progress_state = key.get("Progress", None) backup_name = backup.get("Name", "") backup = { backup_name: { "ID": backup.get("ID", ""), } } if backup.get('Metadata', {}).get('SourceSizeString') is not None: size = backup.get('Metadata', {}).get('SourceSizeString') backup[backup_name]["Source size"] = size if schedule is not None: next_run = helper.format_time(data, schedule.get("Time", "")) if next_run is not None: backup[backup_name]["Next run"] = next_run last_run = helper.format_time(data, schedule.get("LastRun", "")) if last_run is not None: backup[backup_name]["Last run"] = last_run if progress_state is not None: backup[backup_name]["Running"] = { "Task ID": progress_state.get("TaskID", None), "State": progress_state.get("Phase", None), } resource_list.append(backup) elif resource == "notifications": for val in json_input: notification = { val.get("Title", ""): { "Backup ID": val.get("BackupID", ""), "Notification ID": val.get("ID", ""), } } timestamp = helper.format_time(data, val.get("Timestamp", "")) if timestamp is not None: notification["Timestamp"] = timestamp resource_list.append(notification) elif resource == "serversettings": for key, value in json_input.items(): hidden_values = [ "update-check-latest", "last-update-check", "is-first-run", "update-check-interval", "server-passphrase", "server-passphrase-salt", "server-passphrase-trayicon", "server-passphrase-trayicon-hash", "unacked-error", "unacked-warning", "has-fixed-invalid-backup-id", ] if key in hidden_values: continue setting = {key: {"value": value}} resource_list.append(setting) else: resource_list = json_input return resource_list
torch.cuda.manual_seed_all(seed_val) # Store the average loss after each epoch so we can plot them. loss_values = [] model.zero_grad() for epoch_i in range(0, epochs): print("") print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs)) print('Training...') t0 = time.time() total_loss = 0 model.train() for step, batch in enumerate(train_dataloader): if step % 40 == 0 and not step == 0: # logging elapsed = format_time(time.time() - t0) print(' Batch {:>5,} of {:>5,}. Elapsed: {:}.'.format( step, len(train_dataloader), elapsed)) model.train() b_input_ids = batch[0].to(device) b_input_mask = batch[1].to(device) b_labels = batch[2].to(device) b_categories = batch[3].to(device) outputs = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask, labels=b_labels) loss = outputs[0] total_loss += loss.item()
def worker(gpu, ngpus_per_node, args): env_device, train_device = args_initialize(gpu, ngpus_per_node, args) train_env, test_env, observation = env_initialize(args, env_device) train_csv_file, train_csv_writer, eval_csv_file, eval_csv_writer, summary_writer = log_initialize( args, train_device) model = ActorCritic(args.num_stack, train_env.action_space, normalize=args.normalize, name=args.env_name) model, optimizer = model_initialize(args, model, train_device) num_frames_per_iter = args.num_ales * args.num_steps total_steps = math.ceil(args.t_max / (args.world_size * num_frames_per_iter)) shape = (args.num_steps + 1, args.num_ales, args.num_stack, *train_env.observation_space.shape[-2:]) states = torch.zeros(shape, device=train_device, dtype=torch.float32) states[0, :, -1] = observation.to(device=train_device, dtype=torch.float32) shape = (args.num_steps + 1, args.num_ales) values = torch.zeros(shape, device=train_device, dtype=torch.float32) returns = torch.zeros(shape, device=train_device, dtype=torch.float32) shape = (args.num_steps, args.num_ales) rewards = torch.zeros(shape, device=train_device, dtype=torch.float32) masks = torch.zeros(shape, device=train_device, dtype=torch.float32) actions = torch.zeros(shape, device=train_device, dtype=torch.long) # These variables are used to compute average rewards for all processes. episode_rewards = torch.zeros(args.num_ales, device=train_device, dtype=torch.float32) final_rewards = torch.zeros(args.num_ales, device=train_device, dtype=torch.float32) episode_lengths = torch.zeros(args.num_ales, device=train_device, dtype=torch.float32) final_lengths = torch.zeros(args.num_ales, device=train_device, dtype=torch.float32) if args.use_gae: gae = torch.zeros(args.num_ales, device=train_device, dtype=torch.float32) maybe_npy = lambda a: a.numpy() if args.use_openai else a torch.cuda.synchronize() iterator = range(total_steps) if args.rank == 0: iterator = tqdm(iterator) total_time = 0 evaluation_offset = 0 for update in iterator: T = args.world_size * update * num_frames_per_iter if (args.rank == 0) and (T >= evaluation_offset): evaluation_offset += args.evaluation_interval eval_lengths, eval_rewards = test(args, model, test_env) lmean, lmedian, lmin, lmax, lstd = gen_data(eval_lengths) rmean, rmedian, rmin, rmax, rstd = gen_data(eval_rewards) length_data = '(length) min/max/mean/median: {lmin:4.1f}/{lmax:4.1f}/{lmean:4.1f}/{lmedian:4.1f}'.format( lmin=lmin, lmax=lmax, lmean=lmean, lmedian=lmedian) reward_data = '(reward) min/max/mean/median: {rmin:4.1f}/{rmax:4.1f}/{rmean:4.1f}/{rmedian:4.1f}'.format( rmin=rmin, rmax=rmax, rmean=rmean, rmedian=rmedian) print('[training time: {}] {}'.format( format_time(total_time), ' --- '.join([length_data, reward_data]))) if eval_csv_writer and eval_csv_file: eval_csv_writer.writerow([ T, total_time, rmean, rmedian, rmin, rmax, rstd, lmean, lmedian, lmin, lmax, lstd ]) eval_csv_file.flush() if args.plot: summary_writer.add_scalar('eval/rewards_mean', rmean, T, walltime=total_time) summary_writer.add_scalar('eval/lengths_mean', lmean, T, walltime=total_time) start_time = time.time() with torch.no_grad(): for step in range(args.num_steps): value, logit = model(states[step]) # store values values[step] = value.squeeze(-1) # convert actions to numpy and perform next step probs_action = F.softmax(logit, dim=1).multinomial(1).to(env_device) observation, reward, done, info = train_env.step( maybe_npy(probs_action)) if args.use_openai: # convert back to pytorch tensors observation = torch.from_numpy(observation) reward = torch.from_numpy(reward) done = torch.from_numpy(done.astype(np.uint8)) else: observation = observation.squeeze(-1).unsqueeze(1) # move back to training memory observation = observation.to(device=train_device) reward = reward.to(device=train_device, dtype=torch.float32) done = done.to(device=train_device, dtype=torch.bool) probs_action = probs_action.to(device=train_device, dtype=torch.long) not_done = 1.0 - done.float() # update rewards and actions actions[step].copy_(probs_action.view(-1)) masks[step].copy_(not_done) rewards[step].copy_(reward.sign()) # update next observations states[step + 1, :, :-1].copy_(states[step, :, 1:].clone()) states[step + 1] *= not_done.view( -1, *[1] * (observation.dim() - 1)) states[step + 1, :, -1].copy_(observation.view(-1, *states.size()[-2:])) # update episodic reward counters episode_rewards += reward final_rewards[done] = episode_rewards[done] episode_rewards *= not_done episode_lengths += not_done final_lengths[done] = episode_lengths[done] episode_lengths *= not_done returns[-1] = values[-1] = model(states[-1])[0].data.squeeze(-1) if args.use_gae: gae.zero_() for step in reversed(range(args.num_steps)): delta = rewards[step] + (args.gamma * values[step + 1] * masks[step]) - values[step] gae = delta + (args.gamma * args.tau * masks[step] * gae) returns[step] = gae + values[step] else: for step in reversed(range(args.num_steps)): returns[step] = rewards[step] + ( args.gamma * returns[step + 1] * masks[step]) value, logit = model(states[:-1].view(-1, *states.size()[-3:])) log_probs = F.log_softmax(logit, dim=1) probs = F.softmax(logit, dim=1) action_log_probs = log_probs.gather(1, actions.view(-1).unsqueeze(-1)) dist_entropy = -(log_probs * probs).sum(-1).mean() advantages = returns[:-1].view(-1).unsqueeze(-1) - value value_loss = advantages.pow(2).mean() policy_loss = -(advantages.clone().detach() * action_log_probs).mean() loss = value_loss * args.value_loss_coef + policy_loss - dist_entropy * args.entropy_coef optimizer.zero_grad() if args.cpu_train: loss.backward() master_params = model.parameters() else: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() master_params = amp.master_params(optimizer) torch.nn.utils.clip_grad_norm_(master_params, args.max_grad_norm) optimizer.step() states[0].copy_(states[-1]) torch.cuda.synchronize() if args.rank == 0: iter_time = time.time() - start_time total_time += iter_time if args.plot: summary_writer.add_scalar('train/rewards_mean', final_rewards.mean().item(), T, walltime=total_time) summary_writer.add_scalar('train/lengths_mean', final_lengths.mean().item(), T, walltime=total_time) summary_writer.add_scalar('train/value_loss', value_loss, T, walltime=total_time) summary_writer.add_scalar('train/policy_loss', policy_loss, T, walltime=total_time) summary_writer.add_scalar('train/entropy', dist_entropy, T, walltime=total_time) progress_data = callback(args, model, T, iter_time, final_rewards, final_lengths, value_loss.item(), policy_loss.item(), dist_entropy.item(), train_csv_writer, train_csv_file) iterator.set_postfix_str(progress_data) if args.plot and (args.rank == 0): writer.close() if args.use_openai: train_env.close() if args.use_openai_test_env: test_env.close()
def test_format_time(self): test_time_1 = helper.format_time([u'12:00', u'AM']) test_time_2 = helper.format_time([u'2:00', u'PM']) self.assertEqual(test_time_1, '00:00') self.assertEqual(test_time_2, '14:00')