Ejemplo n.º 1
0
def display_status(data):
    message = "Application version: " + config.APPLICATION_VERSION
    common.log_output(message, True)

    message = "Config file: " + config.CONFIG_FILE
    common.log_output(message, True)

    if data.get("parameters_file", None) is not None:
        param_file = data.get("parameters_file", "")
        message = "Params file: " + param_file
        common.log_output(message, True)

    token = data.get("token", None)
    token_expires = data.get("token_expires", None)
    if token is None or token_expires is None:
        common.log_output("Not logged in", True)
        sys.exit(2)

    if data.get("last_login", None) is not None:
        last_login = data.get("last_login", "")
        message = "Logged in  : " + helper.format_time(last_login)
        common.log_output(message, True)

    if token_expires is not None:
        message = "Expiration : " + helper.format_time(token_expires)
        common.log_output(message, True)
Ejemplo n.º 2
0
def display_status(data):
    message = "Server       : " + common.create_baseurl(data)
    common.log_output(message, True)

    server_activity, backup_id = fetch_progress_state(data)
    message = "Server status: "
    if server_activity.get("OverallProgress", 1) != 1:
        message += server_activity.get("Phase", None)
        message += " on backup " + backup_id
    else:
        message += "Idle"
    common.log_output(message, True)

    message = "Config file  : " + config.CONFIG_FILE
    common.log_output(message, True)

    if data.get("parameters_file", None) is not None:
        param_file = data.get("parameters_file", "")
        message = "Params file  : " + param_file
        common.log_output(message, True)

    token = data.get("token", None)
    token_expires = data.get("token_expires", None)
    if token is None or token_expires is None:
        common.log_output("Not logged in", True)
        sys.exit(2)

    if data.get("last_login", None) is not None:
        last_login = data.get("last_login", "")
        message = "Logged in    : " + helper.format_time(last_login)
        common.log_output(message, True)

    if token_expires is not None:
        message = "Expiration   : " + helper.format_time(token_expires)
        common.log_output(message, True)
Ejemplo n.º 3
0
def list_filter(json_input, resource):
    resource_list = []
    if resource == "backups":
        for key in json_input:
            backup = key.get("Backup", None)
            schedule = key.get("Schedule", None)
            progress_state = key.get("Progress", None)
            backup_name = backup.get("Name", "")
            backup = {
                backup_name: {
                    "ID": backup.get("ID", ""),
                }
            }

            if backup.get('Metadata', {}).get('SourceSizeString') is not None:
                size = backup.get('Metadata', {}).get('SourceSizeString')
                backup[backup_name]["Source size"] = size

            if schedule is not None:
                next_run = helper.format_time(schedule.get("Time", ""))
                if next_run is not None:
                    backup[backup_name]["Next run"] = next_run

                last_run = helper.format_time(schedule.get("LastRun", ""))
                if last_run is not None:
                    backup[backup_name]["Last run"] = last_run

            if progress_state is not None:
                backup[backup_name]["Running"] = {
                    "Task ID": progress_state.get("TaskID", None),
                    "State": progress_state.get("Phase", None),
                }

            resource_list.append(backup)

    elif resource == "notifications":
        for val in json_input:
            notification = {
                val.get("Title", ""): {
                    "Backup ID": val.get("BackupID", ""),
                    "Notification ID": val.get("ID", ""),
                }
            }
            timestamp = helper.format_time(val.get("Timestamp", ""))
            if timestamp is not None:
                notification["Timestamp"] = timestamp

            resource_list.append(notification)

    elif resource == "serversettings":
        for key, value in json_input.items():
            if key == "update-check-latest":
                continue
            setting = {key: {"value": value}}

            resource_list.append(setting)
    else:
        resource_list = json_input

    return resource_list
Ejemplo n.º 4
0
    def ui_update_recipe(self):

        if self.cook.command is not None and self.cook.is_running:
            if self.cook.message:
                self.ui_message.set_text(self.cook.message)
            else:
                self.ui_message.set_text(self.cook.name)

            self.ui_operation.set_text(self.cook.command.loc_name)

            args = u""
            for key, value in self.cook.command.args.iteritems():
                args = args + key + ":" + unicode(value) + u" , "
            if args:
                args = args[:-3]

            self.ui_operation_args.set_text(args)

            self.ui_time1.set_text(helper.format_time(self.cook.command_time))
            self.ui_time2.set_text(
                helper.format_time(self.cook.command_total_time) if self.cook.
                command_total_time else "-")
            self.ui_time3.set_text(helper.format_time(self.cook.time))
            self.ui_time4.set_text(
                helper.format_time(self.cook.total_time) if self.cook.
                total_time else "-")

            self.ui_progress.set_fraction(self.cook.time /
                                          self.cook.total_time)

            self.ui_operation_state.set_text(cook.State.repr(self.cook.state))
            self.ui_status.set_text(cook.State.repr(self.cook.state))
        else:
            self.ui_message.set_text(self.cook.name)

            self.ui_operation.set_text("-")
            self.ui_operation_args.set_text("")

            self.ui_time1.set_text("")
            self.ui_time2.set_text("")
            self.ui_time3.set_text("")
            self.ui_time4.set_text("")

            self.ui_progress.set_fraction(0.0)

            self.ui_operation_state.set_text("")
            self.ui_status.set_text("")

        self.ui_freq1.set_text("%.1f" %
                               (self.cook.device.cp_freq_current / 100.0))
        self.ui_freq2.set_text("%.1f" %
                               (self.cook.device.cp_freq_current / 100.0))
        self.ui_par.set_text("")
        self.ui_direction.set_text(
            cp2000.Direction.repr(self.cook.device.direction))
Ejemplo n.º 5
0
    def ui_update_recipe(self):

        if self.cook.command is not None and self.cook.is_running:
            if self.cook.message:
                self.ui_message.set_text(self.cook.message)
            else:
                self.ui_message.set_text(self.cook.name)

            self.ui_operation.set_text(self.cook.command.loc_name)

            args = u""
            for key, value in self.cook.command.args.iteritems():
                args = args + key + ":" + unicode(value) + u" , "
            if args:
                args = args[:-3]

            self.ui_operation_args.set_text(args)

            self.ui_time1.set_text(helper.format_time(self.cook.command_time))
            self.ui_time2.set_text(helper.format_time(self.cook.command_total_time)
                                   if self.cook.command_total_time else "-")
            self.ui_time3.set_text(helper.format_time(self.cook.time))
            self.ui_time4.set_text(helper.format_time(self.cook.total_time)
                                   if self.cook.total_time else "-")

            self.ui_progress.set_fraction(self.cook.time / self.cook.total_time)

            self.ui_operation_state.set_text(cook.State.repr(self.cook.state))
            self.ui_status.set_text(cook.State.repr(self.cook.state))
        else:
            self.ui_message.set_text(self.cook.name)

            self.ui_operation.set_text("-")
            self.ui_operation_args.set_text("")

            self.ui_time1.set_text("")
            self.ui_time2.set_text("")
            self.ui_time3.set_text("")
            self.ui_time4.set_text("")

            self.ui_progress.set_fraction(0.0)

            self.ui_operation_state.set_text("")
            self.ui_status.set_text("")

        self.ui_freq1.set_text("%.1f" % (self.cook.device.cp_freq_current/100.0))
        self.ui_freq2.set_text("%.1f" % (self.cook.device.cp_freq_current/100.0))
        self.ui_par.set_text("")
        self.ui_direction.set_text(cp2000.Direction.repr(self.cook.device.direction))
Ejemplo n.º 6
0
def get_live_logs(data, level, page_size, first_id, output_type):
    baseurl = common.create_baseurl(data, "/api/v1/logdata/poll")
    cookies = common.create_cookies(data)
    headers = common.create_headers(data)
    verify = data.get("server", {}).get("verify", True)
    params = {'level': level, 'id': first_id, 'pagesize': page_size}

    r = requests.get(baseurl,
                     headers=headers,
                     cookies=cookies,
                     params=params,
                     verify=verify)
    common.check_response(data, r.status_code)
    if r.status_code == 500:
        message = "Error getting log, "
        message += "database may be locked by backup"
        common.log_output(message, True)
        return
    elif r.status_code != 200:
        common.log_output("Error getting log", True, r.status_code)
        return

    result = r.json()[-page_size:]
    logs = []
    for log in result:
        log["When"] = helper.format_time(data, log.get("When", ""))
        logs.append(log)

    if len(logs) == 0:
        common.log_output("No log entries found", True)
        return

    helper.output_dump(logs, output_type)
Ejemplo n.º 7
0
    def __init_time_bar(self, duration, interval):
        pos = avg.Point2D(58, 0)
        size = avg.Point2D(self.width - pos.x - 10, 60)
        self.__time_bar = avg.DivNode(pos=pos, size=size, parent=self)

        avg.WordsNode(pos=(0, 0),
                      color=global_values.COLOR_FOREGROUND,
                      fontsize=global_values.FONT_SIZE,
                      text="Time range",
                      parent=self.__time_bar)

        self.__time_slider = custom_slider.IntervalScrollBar(
            pos=(0, 27),
            width=size.x,
            range=(0, duration),
            thumbExtent=duration,
            parent=self.__time_bar)
        self.__time_slider.subscribe(
            custom_slider.IntervalScrollBar.THUMB_POS_CHANGED,
            self.__on_scroll)

        self.__start_label = avg.WordsNode(
            pos=(0, 48),
            color=global_values.COLOR_FOREGROUND,
            text="0:00 ({})".format(helper.format_time(interval[0], False)),
            fontsize=global_values.FONT_SIZE,
            parent=self.__time_bar)
        self.__end_label = avg.WordsNode(
            pos=(size.x, 48),
            color=global_values.COLOR_FOREGROUND,
            text="({}) {}".format(helper.format_time(interval[1], False),
                                  helper.format_time(self.__duration, False)),
            alignment="right",
            fontsize=global_values.FONT_SIZE,
            parent=self.__time_bar)
        self.__cur_time_line = avg.LineNode(color=global_values.COLOR_WHITE,
                                            sensitive=False,
                                            parent=self.__time_bar)
        self.__duration_time_label = avg.WordsNode(
            pos=(size.x, 0),
            color=global_values.COLOR_FOREGROUND,
            alignment="right",
            fontsize=global_values.FONT_SIZE,
            parent=self.__time_bar)
Ejemplo n.º 8
0
def follow_function(data, function, interval=5):
    try:
        while True:
            compatibility.clear_prompt()
            function()
            timestamp = helper.format_time(data, datetime.datetime.now())
            common.log_output(timestamp, True)
            common.log_output("Press control+C to quit", True)
            time.sleep(interval)
    except KeyboardInterrupt:
        return
Ejemplo n.º 9
0
    def __show_users(self, time):
        set_timestamp = True

        helper.unlink_node_list(self.__user_nodes)
        self.__user_nodes = []

        for i, user in enumerate(self.__users):
            if not self._vis_params.get_user_visible(i):
                continue
            if self.__mode == "user" and user.headInfoCount == 0 or self.__mode == "device" and user.deviceEntryInfoCount == 0:
                continue

            if self.__mode == "user":
                pos = user.getHeadPos(time)
                viewpt = (self._x_axis.value_to_pixel(user.getWallViewpoint(time).x),
                          self._y_axis.value_to_pixel(0))
                if set_timestamp:
                    head_data = user.getHeadData(time)
                    self.__timestamp_words_node.text = "{}\n{}".format(
                        datetime.datetime.fromtimestamp(head_data.time).strftime("%H:%M:%S.%f"),
                        helper.format_time(head_data.time - self.__session_start_time)
                    )
                    set_timestamp = False
            elif self.__mode == "device":
                pos = user.getDeviceEntry(time).spacePos
                viewpt = (self._x_axis.value_to_pixel(user.getDeviceWallViewpoint(time).x),
                          self._y_axis.value_to_pixel(0))
                if set_timestamp:
                    device_entry = user.getDeviceEntry(time)
                    self.__timestamp_words_node.text = "{}\n{}".format(
                        datetime.datetime.fromtimestamp(device_entry.time).strftime("%H:%M:%S.%f"),
                        helper.format_time(device_entry.time - self.__session_start_time)
                    )
                    set_timestamp = False
            else:
                return

            pixel_pos = avg.Point2D(self._x_axis.value_to_pixel(pos[0]), self._y_axis.value_to_pixel(pos[2]))
            node = UserNode(user.getUserID(), pos=pixel_pos, viewpt=viewpt, parent=self._data_div)
            self.__user_nodes.append(node)
Ejemplo n.º 10
0
    def __update_time(self, vis_params):
        cur_time = vis_params.highlight_time
        line_x = (cur_time / self.__duration) * self.__time_slider.width
        self.__cur_time_line.pos1 = (line_x, 23)
        self.__cur_time_line.pos2 = (line_x, 50)

        # Check if something has changed.
        if self.__time_interval == vis_params.get_time_interval():
            return

        # Set the new interval.
        self.__time_interval = vis_params.get_time_interval()[:]

        self.__time_slider.setThumbExtent(self.__time_interval[1] -
                                          self.__time_interval[0])
        self.__time_slider.setThumbPos(self.__time_interval[0])

        self.__start_label.text = "0:00 ({})".format(
            helper.format_time(self.__time_interval[0], False))
        self.__end_label.text = "({}) {}".format(
            helper.format_time(self.__time_interval[1], False),
            helper.format_time(self.__duration, False))
        self.__duration_time_label.text = "Interval duration: " + helper.format_time(
            self.__time_interval[1] - self.__time_interval[0], False)
Ejemplo n.º 11
0
    async def info(self, context):
        response = await context.send(
            embed=create_embed({
                'title': 'Loading bot info...',
                'color': discord.Color.gold()
            }))

        try:
            uptime = round(time.time() - self.uptime)
            uptime_text = format_time(uptime)
            ping = round(self.client.latency * 1000)
            invite_url = discord.utils.oauth_url(
                client_id=CLIENT_ID, permissions=discord.Permissions(8))

            connected_servers = 0
            members_watching = 0
            user_ids = []

            for guild in self.client.guilds:
                connected_servers += 1
                for member in guild.members:
                    members_watching += 1
                    if not member.id in user_ids:
                        user_ids.append(member.id)

            users_watching = len(user_ids)
            await response.edit(embed=create_embed(
                {
                    'title': 'Invite',
                    'url': invite_url,
                    'inline': True,
                }, {
                    'Ping': f'{ping} ms',
                    'Uptime': uptime_text,
                    'Connected Servers': connected_servers,
                    'Users Watching': members_watching,
                    'Unique Users Watching': users_watching
                }))
        except Exception as error_message:
            await response.edit(embed=create_embed(
                {
                    'title': 'Could not load bot info',
                    'color': discord.Color.red()
                }, {'Error Message': error_message}))

            print(f'Cannot load bot info')
            print(error_message)
Ejemplo n.º 12
0
    def __format_label(self, value):
        if self.__unit is "m":  # meters
            # cut zeros if value is integer
            if value % 1 in (0, 0.0):
                value = int(value)
            else:
                value = round(value, 4)

            return "{} m".format(value)
        elif self.__unit is "s":  # seconds
            return helper.format_time(value)
        elif self.__unit is "px":  # pixels
            return ""
        elif self.__unit is "user":
            return ""
        elif self.__unit is "own":
            return self.__tick_labels[value]

        assert False
Ejemplo n.º 13
0
    def __init__(self):
        cwd = os.getcwd()
        self.call_list = []
        self.msg_list = []
        for file in os.listdir(cwd + '/excel'):

            def open_excel(ending, dict_name):
                if file.endswith(ending):
                    wb = xlrd.open_workbook('excel/' + file)
                    sh = wb.sheet_by_index(0)
                    for rownum in reversed(range(1,
                                                 wb.sheet_by_index(0).nrows)):
                        dict_name.append(sh.row_values(rownum))

            open_excel('通信.xls', self.call_list)
            open_excel('彩信.xls', self.msg_list)
        self.phone_list = set()
        self.msg_phone_list = set()
        phone_location = {}
        for call in self.call_list:
            # convert to seconds
            call[3] = format_time(call[3])
            self.phone_list.add(call[5])
            phone_location[call[5]] = call[7]
        for msg in self.msg_list:
            self.msg_phone_list.add(msg[3])

        self.month_list = []
        first = datetime.date.today().replace(day=1)
        i = 6

        while i > 0:
            self.month_list.append(first)
            first -= dateutil.relativedelta.relativedelta(months=1)
            i -= 1

        # msg location
        for msg in self.msg_list:
            # same number
            if msg[3] in phone_location:
                msg.append(phone_location[msg[3]])
            else:
                msg.append('')
Ejemplo n.º 14
0
def notification_filter(data, json_input):
    notification_list = []
    for key in json_input:
        title = key.get("Title", "Notification")
        notification = {
            title: {
                "Backup ID": key.get("BackupID", ""),
                "Notification ID": key.get("ID", ""),
                "Message": key.get("Message", ""),
                "Type": key.get("Type", ""),
            }
        }
        timestamp = helper.format_time(data, key.get("Timestamp", ""))
        if timestamp is not None:
            notification[title]["Timestamp"] = timestamp

        notification_list.append(notification)

    return notification_list
Ejemplo n.º 15
0
def worker(gpu, ngpus_per_node, args):

    env_device, train_device = args_initialize(gpu, ngpus_per_node, args)

    train_env, test_env, observation = env_initialize(args, env_device)

    train_csv_file, train_csv_writer, eval_csv_file, eval_csv_writer, summary_writer = log_initialize(
        args, train_device)

    model = ActorCritic(args.num_stack,
                        train_env.action_space,
                        BasicBlock,
                        normalize=args.normalize,
                        name=args.env_name)
    model, optimizer = model_initialize(args, model, train_device)

    if (args.num_ales % args.num_minibatches) != 0:
        raise ValueError(
            'Number of ales({}) size is not even divisible by the minibatch size({})'
            .format(args.num_ales, args.num_minibatches))

    if args.num_steps_per_update == -1:
        args.num_steps_per_update = args.num_steps

    minibatch_size = int(args.num_ales / args.num_minibatches)
    print("minibatch_size", minibatch_size)
    step0 = args.num_steps - args.num_steps_per_update
    n_minibatch = -1

    # This is the number of frames GENERATED between two updates
    num_frames_per_iter = args.num_ales * args.num_steps_per_update
    total_steps = math.ceil(
        args.t_max /
        (args.world_size * num_frames_per_iter))  #number of total frame

    shape = (args.num_steps + 1, args.num_ales, args.num_stack,
             *train_env.observation_space.shape[-2:])
    states = torch.zeros(shape, device=train_device, dtype=torch.float32)
    states[step0, :, -1] = observation.to(device=train_device,
                                          dtype=torch.float32)

    shape = (args.num_steps + 1, args.num_ales)
    values = torch.zeros(shape, device=train_device, dtype=torch.float32)
    logits = torch.zeros(
        (args.num_steps + 1, args.num_ales, train_env.action_space.n),
        device=train_device,
        dtype=torch.float32)
    returns = torch.zeros(shape, device=train_device, dtype=torch.float32)

    shape = (args.num_steps, args.num_ales)
    rewards = torch.zeros(shape, device=train_device, dtype=torch.float32)
    masks = torch.zeros(shape, device=train_device, dtype=torch.float32)
    actions = torch.zeros(shape, device=train_device, dtype=torch.long)
    actions_one_hot = torch.zeros((args.num_steps, args.num_ales, 18),
                                  device=train_device,
                                  dtype=torch.long)
    actions_space = torch.zeros(18, device=train_device, dtype=torch.long)

    #for LSTM
    lstm_hidden_state = torch.zeros((args.num_steps + 1, args.num_ales, 256),
                                    device=train_device,
                                    dtype=torch.float32)

    mus = torch.ones(shape, device=train_device, dtype=torch.float32)
    # pis = torch.zeros(shape, device=train_device, dtype=torch.float32)
    rhos = torch.zeros((args.num_steps, minibatch_size),
                       device=train_device,
                       dtype=torch.float32)

    # These variables are used to compute average rewards for all processes.
    episode_rewards = torch.zeros(args.num_ales,
                                  device=train_device,
                                  dtype=torch.float32)
    final_rewards = torch.zeros(args.num_ales,
                                device=train_device,
                                dtype=torch.float32)
    episode_lengths = torch.zeros(args.num_ales,
                                  device=train_device,
                                  dtype=torch.float32)
    final_lengths = torch.zeros(args.num_ales,
                                device=train_device,
                                dtype=torch.float32)

    #init replay memory
    #mem = ReplayMemory(observation.to(device=train_device, dtype=torch.float32),args,train_device)

    torch.cuda.synchronize()

    iterator = range(total_steps)
    if args.rank == 0:
        iterator = tqdm(iterator)
        total_time = 0
        evaluation_offset = 0
    opt_step = 0
    aux_task = False
    for update in iterator:

        T = args.world_size * update * num_frames_per_iter
        if (args.rank == 0) and (T >= evaluation_offset):
            print("===========evaluating=========")
            evaluation_offset += args.evaluation_interval
            torch.save(model.state_dict(), "./model_save")

            eval_lengths, eval_rewards = test(args, model, test_env)

            lmean, lmedian, lmin, lmax, lstd = gen_data(eval_lengths)
            rmean, rmedian, rmin, rmax, rstd = gen_data(eval_rewards)
            length_data = '(length) min/max/mean/median: {lmin:4.1f}/{lmax:4.1f}/{lmean:4.1f}/{lmedian:4.1f}'.format(
                lmin=lmin, lmax=lmax, lmean=lmean, lmedian=lmedian)
            reward_data = '(reward) min/max/mean/median: {rmin:4.1f}/{rmax:4.1f}/{rmean:4.1f}/{rmedian:4.1f}'.format(
                rmin=rmin, rmax=rmax, rmean=rmean, rmedian=rmedian)
            print('[training time: {}] {}'.format(
                format_time(total_time),
                ' --- '.join([length_data, reward_data])))

            if eval_csv_writer and eval_csv_file:
                eval_csv_writer.writerow([
                    T, total_time, rmean, rmedian, rmin, rmax, rstd, lmean,
                    lmedian, lmin, lmax, lstd
                ])
                eval_csv_file.flush()

            if args.plot:
                summary_writer.add_scalar('eval/rewards_mean',
                                          rmean,
                                          T,
                                          walltime=total_time)
                summary_writer.add_scalar('eval/lengths_mean',
                                          lmean,
                                          T,
                                          walltime=total_time)

        start_time = time.time()

        with torch.no_grad():

            for step in range(args.num_steps_per_update):
                #nvtx.range_push('train:step')
                #value, logit = model(states[step0 + step])#,lstm_hidden_state[step0+step])
                value, logit, lstm_hidden_state[step0 + step] = model(
                    states[step0 + step], args, lstm_hidden_state[step0],
                    actions_one_hot[step], rewards[step])
                # store values and logits
                values[step0 + step] = value.squeeze(-1)

                # convert actions to numpy and perform next step
                probs = torch.clamp(F.softmax(logit, dim=1),
                                    min=0.00001,
                                    max=0.99999)
                probs_action = probs.multinomial(1).to(env_device)
                actions_space[probs_action] = 1

                torch.cuda.current_stream().synchronize()
                observation, reward, done, info = train_env.step(probs_action)

                observation = observation.squeeze(-1).unsqueeze(1)

                # move back to training memory
                observation = observation.to(device=train_device)
                reward = reward.to(device=train_device, dtype=torch.float32)
                done = done.to(device=train_device, dtype=torch.bool)
                probs_action = probs_action.to(device=train_device,
                                               dtype=torch.long)

                not_done = 1.0 - done.float()

                lstm_hidden_state[step0 + step] *= not_done[:, None]

                # update rewards and actions
                actions[step0 + step].copy_(probs_action.view(-1))
                actions_one_hot[step0 + step].copy_(actions_space)
                masks[step0 + step].copy_(not_done)
                rewards[step0 + step].copy_(reward.sign())

                #mus[step0 + step] = F.softmax(logit, dim=1).gather(1, actions[step0 + step].view(-1).unsqueeze(-1)).view(-1)
                mus[step0 + step] = torch.clamp(F.softmax(logit, dim=1).gather(
                    1, actions[step0 + step].view(-1).unsqueeze(-1)).view(-1),
                                                min=0.00001,
                                                max=0.99999)

                # update next observations
                states[step0 + step + 1, :, :-1].copy_(states[step0 + step, :,
                                                              1:])
                states[step0 + step + 1] *= not_done.view(
                    -1, *[1] * (observation.dim() - 1))
                states[step0 + step + 1, :,
                       -1].copy_(observation.view(-1,
                                                  *states.size()[-2:]))

                # update episodic reward counters
                episode_rewards += reward
                final_rewards[done] = episode_rewards[done]
                episode_rewards *= not_done

                episode_lengths += not_done
                final_lengths[done] = episode_lengths[done]
                episode_lengths *= not_done
                nvtx.range_pop()

                #APPENDING observation
                #mem.append(Experience(obs=observation.to(device=train_device, dtype=torch.float32),action=probs_action.view(-1).unsqueeze(1),reward=reward.unsqueeze(1)))#,done=done.unsqueeze(1)))

                # mem.append(Experience(obs=observation.to(device=train_device, dtype=torch.float32),reward=reward.unsqueeze(1)))

        # if (opt_step >100 and opt_step %50 ==0):
        #     mem.clearMemory(); #clear half of  memory every 50 steps

        n_minibatch = (n_minibatch + 1) % args.num_minibatches
        min_ale_index = int(n_minibatch * minibatch_size)
        max_ale_index = min_ale_index + minibatch_size

        #to cat with output from FC and last reward
        #actions_one_hot= torch.cat([torch.zeros(args.num_steps,minibatch_size,18).to(device=train_device,dtype=torch.long),actions_one_hot[:,min_ale_index:max_ale_index,:]])

        nvtx.range_push('train:compute_values')
        # not sure about the LSTM input ouput
        value, logit ,lstm_hidden_state[:,min_ale_index:max_ale_index] = model(states[:, min_ale_index:max_ale_index, :, :, :].contiguous().view(-1, *states.size()[-3:]),\
                                args,lstm_hidden_state[:,min_ale_index:max_ale_index].contiguous(),
                                actions_one_hot[:,min_ale_index:max_ale_index,:].contiguous().view(-1,18),\
                                rewards[:,min_ale_index:max_ale_index].contiguous().view(-1,1))
        batch_value = value.detach().view((args.num_steps + 1, minibatch_size))
        batch_probs = F.softmax(logit.detach()[:(args.num_steps *
                                                 minibatch_size), :],
                                dim=1)
        batch_pis = batch_probs.gather(
            1, actions[:, min_ale_index:max_ale_index].contiguous().view(
                -1).unsqueeze(-1)).view((args.num_steps, minibatch_size))
        returns[-1, min_ale_index:max_ale_index] = batch_value[-1]

        with torch.no_grad():
            for step in reversed(range(args.num_steps)):
                c = torch.clamp(batch_pis[step, :] /
                                mus[step, min_ale_index:max_ale_index],
                                max=args.c_hat)
                rhos[step, :] = torch.clamp(
                    batch_pis[step, :] /
                    mus[step, min_ale_index:max_ale_index],
                    max=args.rho_hat)
                delta_value = rhos[step, :] * (
                    rewards[step, min_ale_index:max_ale_index] +
                    (args.gamma * batch_value[step + 1] -
                     batch_value[step]).squeeze())
                returns[step, min_ale_index:max_ale_index] = \
                        batch_value[step, :].squeeze() + delta_value + args.gamma * c * \
                        (returns[step + 1, min_ale_index:max_ale_index] - batch_value[step + 1, :].squeeze())

        value = value[:args.num_steps * minibatch_size, :]
        logit = logit[:args.num_steps * minibatch_size, :]

        log_probs = F.log_softmax(logit, dim=1)
        probs = F.softmax(logit, dim=1)

        action_log_probs = log_probs.gather(
            1, actions[:, min_ale_index:max_ale_index].contiguous().view(
                -1).unsqueeze(-1))
        dist_entropy = -(log_probs * probs).sum(-1).mean()

        advantages = returns[:-1, min_ale_index:max_ale_index].contiguous(
        ).view(-1).unsqueeze(-1) - value

        value_loss = advantages.pow(2).mean()
        policy_loss = -(action_log_probs * rhos.view(-1, 1).detach() * \
                (rewards[:, min_ale_index:max_ale_index].contiguous().view(-1, 1) + args.gamma * \
                returns[1:, min_ale_index:max_ale_index].contiguous().view(-1, 1) - value).detach()).mean()
        nvtx.range_pop()

        nvtx.range_push('train:backprop')

        # auxliary task from UNREAL     https://arxiv.org/pdf/1611.05397
        #REWARD PREDICTION
        # if (opt_step>100 and opt_step%20 == 0):
        #     aux_task = True
        #     obs = []
        #     for i in range(20):
        #         obs.append(mem.rp())
        #     states_,batch_rp_c= process_rp(obs)
        #     rp_c = model(states_,args,aux_task='rp')+1e-7
        #     print(rp_c)
        #
        #     rp_loss = -torch.sum(batch_rp_c.to(device=train_device) * torch.log(rp_c))/20/3
        #     print("---------------rp_loss---",rp_loss)
        # #     #####################################################
        ###   pixel change loss
        #     # obs_=[]
        #     # #for i in range(32):TODO BATCH LATER
        #     # obs_.append(mem.pc())
        #     #
        #     # states_pc,batch_pc_a,batch_pc_R = process_pc(obs_,model,train_device)
        #     # print(len(states_pc))
        #     # print(states_pc[0].shape)
        #     # print(batch_pc_a[0].shape)
        #     # print(batch_pc_R[0].shape)
        #     # print(torch.cat(states_pc).shape)
        #     # print(stop)
        #  #torch.Size([5, 4, 84, 84])
        #         #torch.Size([18])
        #         #torch.Size([5, 20, 20])
        #     #print(torch.cat(states_pc).shape)
        #     #print(stop)
        #     # states_pc = torch.cat(states_pc).view(-1,4,84,84)
        #     # pc_q, pc_q_max = model(states_pc,aux_task='pc')
        #     # print(pc_q_max.shape)
        #     # pc_a_reshape = batch_pc_a[0].view(-1,train_env.action_space.n,1,1)
        #     # pc_qa_ = torch.mul(pc_q,pc_a_reshape)
        #     # pc_qa = torch.sum (pc_qa_, dim=1,keepdim =False)
        #     #
        #     # print(pc_qa.shape)
        #     # print(batch_pc_R[0].shape)
        #     # print(pc_qa.shape)
        #     # pc_loss =  torch.sum( (( batch_pc_R[0]-pc_qa)**2/2.)                )
        #     # print(pc_loss)
        #     # print(stop)
        #

        loss = value_loss * args.value_loss_coef + policy_loss - dist_entropy * args.entropy_coef
        # if aux_task ==True:
        #     loss += rp_loss
        #     aux_task = False

        optimizer.zero_grad()

        with amp.scale_loss(loss, optimizer) as scaled_loss:
            scaled_loss.backward(retain_graph=True)
        master_params = amp.master_params(optimizer)

        torch.nn.utils.clip_grad_norm_(master_params, args.max_grad_norm)
        optimizer.step()
        opt_step += 1

        #nvtx.range_pop()

        #nvtx.range_push('train:next_states')
        for step in range(0, args.num_steps_per_update):
            states[:-1, :, :, :, :] = states[1:, :, :, :, :]
            rewards[:-1, :] = rewards[1:, :]
            actions[:-1, :] = actions[1:, :]
            # actions_one_hot[:-1,:] = actions_one_hot[1:,:]
            # lstm_hidden_state[:-1,:] = lstm_hidden_state [1:,:]
            masks[:-1, :] = masks[1:, :]
            mus[:-1, :] = mus[1:, :]
        #nvtx.range_pop()

        torch.cuda.synchronize()

        if args.rank == 0:
            iter_time = time.time() - start_time
            total_time += iter_time

            if args.plot:
                summary_writer.add_scalar('train/rewards_mean',
                                          final_rewards.mean().item(),
                                          T,
                                          walltime=total_time)
                summary_writer.add_scalar('train/lengths_mean',
                                          final_lengths.mean().item(),
                                          T,
                                          walltime=total_time)
                summary_writer.add_scalar('train/value_loss',
                                          value_loss,
                                          T,
                                          walltime=total_time)
                summary_writer.add_scalar('train/policy_loss',
                                          policy_loss,
                                          T,
                                          walltime=total_time)
                summary_writer.add_scalar('train/entropy',
                                          dist_entropy,
                                          T,
                                          walltime=total_time)

            progress_data = callback(args, model, T, iter_time, final_rewards,
                                     final_lengths, value_loss, policy_loss,
                                     dist_entropy, train_csv_writer,
                                     train_csv_file)
            iterator.set_postfix_str(progress_data)

    if args.plot and (args.rank == 0):
        # name = '{}.pth'.format("BO_PC")
        # torch.save(model.module.state_dict(), "Pong_1gpu_1200")
        summary_writer.close()
Ejemplo n.º 16
0
def worker(gpu, ngpus_per_node, args):
    args.gpu = gpu

    if args.distributed:
        args.seed += args.gpu
        torch.cuda.set_device(args.gpu)

        args.rank = int(os.environ['RANK']) if 'RANK' in os.environ else 0
        if args.multiprocessing_distributed:
            args.rank = args.rank * ngpus_per_node + args.gpu

        torch.distributed.init_process_group(
            backend='nccl',
            init_method='tcp://127.0.0.1:8632',
            world_size=args.world_size,
            rank=args.rank)
    else:
        args.rank = 0

    args.use_cuda_env = args.use_cuda_env and torch.cuda.is_available()
    args.no_cuda_train = not torch.cuda.is_available()
    args.verbose = args.verbose and (args.rank == 0)

    env_device = torch.device(
        'cuda', args.gpu) if args.use_cuda_env else torch.device('cpu')
    train_device = torch.device('cuda', args.gpu) if (
        args.no_cuda_train == False) else torch.device('cpu')

    # Setup
    np.random.seed(args.seed)
    torch.manual_seed(np.random.randint(1, 10000))
    if args.use_cuda_env or (args.no_cuda_train == False):
        torch.cuda.manual_seed(random.randint(1, 10000))

    if train_device.type == 'cuda':
        print('Train:\n' + cuda_device_str(train_device.index), flush=True)

    if args.use_openai:
        test_env = create_vectorize_atari_env(args.env_name,
                                              args.seed,
                                              args.evaluation_episodes,
                                              episode_life=False,
                                              clip_rewards=False)
        test_env.reset()
    else:
        test_env = AtariEnv(args.env_name,
                            args.evaluation_episodes,
                            color_mode='gray',
                            device='cpu',
                            rescale=True,
                            clip_rewards=False,
                            episodic_life=False,
                            repeat_prob=0.0,
                            frameskip=4)

    # Agent
    dqn = Agent(args, test_env.action_space)

    # Construct validation memory
    if args.rank == 0:
        print('Initializing evaluation memory with {} entries...'.format(
            args.evaluation_size),
              end='',
              flush=True)
        start_time = time.time()

    val_mem = initialize_validation(args, train_device)

    if args.rank == 0:
        print('complete ({})'.format(format_time(time.time() - start_time)),
              flush=True)

    if args.evaluate:
        dqn.eval()
        rewards, lengths, avg_Q = test(args, 0, dqn, val_mem, test_env,
                                       train_device)  # Test
    else:
        if args.rank == 0:
            print('Entering main training loop', flush=True)

            if args.output_filename:
                csv_file = open(args.output_filename, 'w', newline='')
                csv_file.write(json.dumps(vars(args)))
                csv_file.write('\n')
                csv_writer = csv.writer(csv_file, delimiter=',')
                csv_writer.writerow([
                    'frames', 'total_time', 'rmean', 'rmedian', 'rstd', 'rmin',
                    'rmax', 'lmean', 'lmedian', 'lstd', 'lmin', 'lmax'
                ])
            else:
                csv_writer, csv_file = None, None

            if args.plot:
                from tensorboardX import SummaryWriter
                current_time = datetime.now().strftime('%b%d_%H-%M-%S')
                log_dir = os.path.join(
                    args.log_dir, current_time + '_' + socket.gethostname())
                writer = SummaryWriter(log_dir=log_dir)
                for k, v in vars(args).items():
                    writer.add_text(k, str(v))

            # Environment
            print('Initializing environments...', end='', flush=True)
            start_time = time.time()

        if args.use_openai:
            train_env = create_vectorize_atari_env(
                args.env_name,
                args.seed,
                args.num_ales,
                episode_life=True,
                clip_rewards=args.reward_clip,
                max_frames=args.max_episode_length)
            observation = torch.from_numpy(train_env.reset()).squeeze(1)
        else:
            train_env = AtariEnv(args.env_name,
                                 args.num_ales,
                                 color_mode='gray',
                                 device=env_device,
                                 rescale=True,
                                 clip_rewards=args.reward_clip,
                                 episodic_life=True,
                                 repeat_prob=0.0)
            train_env.train()
            observation = train_env.reset(
                initial_steps=args.ale_start_steps,
                verbose=args.verbose).clone().squeeze(-1)

        if args.rank == 0:
            print('complete ({})'.format(format_time(time.time() -
                                                     start_time)),
                  flush=True)

        # These variables are used to compute average rewards for all processes.
        episode_rewards = torch.zeros(args.num_ales,
                                      device=train_device,
                                      dtype=torch.float32)
        episode_lengths = torch.zeros(args.num_ales,
                                      device=train_device,
                                      dtype=torch.float32)
        final_rewards = torch.zeros(args.num_ales,
                                    device=train_device,
                                    dtype=torch.float32)
        final_lengths = torch.zeros(args.num_ales,
                                    device=train_device,
                                    dtype=torch.float32)
        has_completed = torch.zeros(args.num_ales,
                                    device=train_device,
                                    dtype=torch.uint8)

        mem = ReplayMemory(args, args.memory_capacity, train_device)
        mem.reset(observation)
        priority_weight_increase = (1 - args.priority_weight) / (
            args.t_max - args.learn_start)

        state = torch.zeros((args.num_ales, args.history_length, 84, 84),
                            device=mem.device,
                            dtype=torch.float32)
        state[:, -1] = observation.to(device=mem.device,
                                      dtype=torch.float32).div(255.0)

        num_frames_per_iter = args.num_ales
        total_steps = math.ceil(args.t_max /
                                (args.world_size * num_frames_per_iter))
        epsilons = np.linspace(
            args.epsilon_start, args.epsilon_final,
            math.ceil(args.epsilon_frames / num_frames_per_iter))
        epsilon_offset = math.ceil(args.learn_start / num_frames_per_iter)

        prefetcher = data_prefetcher(args.batch_size, train_device, mem)

        avg_loss = 'N/A'
        eval_offset = 0
        target_update_offset = 0

        total_time = 0
        env_time = 0
        mem_time = 0
        net_time = 0

        fps_steps = 0
        fps_start_time = time.time()

        # main loop
        iterator = range(total_steps)
        if args.rank == 0:
            iterator = tqdm(iterator)

        env_stream = torch.cuda.Stream()
        train_stream = torch.cuda.Stream()

        for update in iterator:

            T = args.world_size * update * num_frames_per_iter
            epsilon = epsilons[min(
                update - epsilon_offset,
                len(epsilons) - 1)] if T >= args.learn_start else epsilons[0]
            start_time = time.time()

            if update % args.replay_frequency == 0:
                dqn.reset_noise()  # Draw a new set of noisy weights

            dqn.eval()
            nvtx.range_push('train:select action')
            if args.noisy_linear:
                action = dqn.act(
                    state)  # Choose an action greedily (with noisy weights)
            else:
                action = dqn.act_e_greedy(state, epsilon=epsilon)
            nvtx.range_pop()
            dqn.train()

            fps_steps += 1

            if args.use_openai:
                action = action.cpu().numpy()

            torch.cuda.synchronize()

            with torch.cuda.stream(env_stream):
                nvtx.range_push('train:env step')
                if args.use_openai:
                    observation, reward, done, info = train_env.step(
                        action)  # Step
                    # convert back to pytorch tensors
                    observation = torch.from_numpy(observation).squeeze(1)
                    reward = torch.from_numpy(reward.astype(np.float32))
                    done = torch.from_numpy(done.astype(np.uint8))
                    action = torch.from_numpy(action)
                else:
                    observation, reward, done, info = train_env.step(
                        action, asyn=True)  # Step
                    observation = observation.clone().squeeze(-1)
                nvtx.range_pop()

                observation = observation.to(device=train_device)
                reward = reward.to(device=train_device)
                done = done.to(device=train_device)
                action = action.to(device=train_device)

                delta = time.time() - start_time
                env_time += delta
                total_time += delta

                observation = observation.float().div_(255.0)

                state[:, :-1].copy_(state[:, 1:].clone())
                state *= (1 - done).view(-1, 1, 1, 1).float()
                state[:, -1].copy_(observation)

                # update episodic reward counters
                not_done = (1 - done).float()
                has_completed |= (done == 1)

                episode_rewards += reward.float()
                final_rewards[done] = episode_rewards[done]
                episode_rewards *= not_done

                episode_lengths += not_done
                final_lengths[done] = episode_lengths[done]
                episode_lengths *= not_done

            # Train and test
            if T >= args.learn_start:
                mem.priority_weight = min(
                    mem.priority_weight + priority_weight_increase,
                    1)  # Anneal importance sampling weight β to 1
                prefetcher.preload()

                avg_loss = 0.0
                num_minibatches = min(
                    int(args.num_ales / args.replay_frequency), 8)
                for _ in range(num_minibatches):
                    # Sample transitions
                    start_time = time.time()
                    nvtx.range_push('train:sample states')
                    idxs, states, actions, returns, next_states, nonterminals, weights = prefetcher.next(
                    )
                    nvtx.range_pop()
                    delta = time.time() - start_time
                    mem_time += delta
                    total_time += delta

                    start_time = time.time()
                    nvtx.range_push('train:network update')
                    loss = dqn.learn(states, actions, returns, next_states,
                                     nonterminals, weights)
                    nvtx.range_pop()
                    delta = time.time() - start_time
                    net_time += delta
                    total_time += delta

                    start_time = time.time()
                    nvtx.range_push('train:update priorities')
                    mem.update_priorities(
                        idxs, loss)  # Update priorities of sampled transitions
                    nvtx.range_pop()
                    delta = time.time() - start_time
                    mem_time += delta
                    total_time += delta

                    avg_loss += loss.mean().item()
                avg_loss /= num_minibatches

                # Update target network
                if T >= target_update_offset:
                    dqn.update_target_net()
                    target_update_offset += args.target_update

            torch.cuda.current_stream().wait_stream(env_stream)
            torch.cuda.current_stream().wait_stream(train_stream)

            start_time = time.time()
            nvtx.range_push('train:append memory')
            mem.append(observation, action, reward,
                       done)  # Append transition to memory
            nvtx.range_pop()
            delta = time.time() - start_time
            mem_time += delta
            total_time += delta

            fps_end_time = time.time()
            fps = (args.world_size * fps_steps *
                   args.num_ales) / (fps_end_time - fps_start_time)
            fps_start_time = fps_end_time
            fps_steps = 0

            if args.rank == 0:
                if args.plot and ((update % args.replay_frequency) == 0):
                    writer.add_scalar('train/epsilon', epsilon, T)
                    writer.add_scalar('train/rewards', final_rewards.mean(), T)
                    writer.add_scalar('train/lengths', final_lengths.mean(), T)

                if T >= eval_offset:
                    eval_start_time = time.time()
                    dqn.eval()  # Set DQN (online network) to evaluation mode
                    rewards, lengths, avg_Q = test(args, T, dqn, val_mem,
                                                   test_env, train_device)
                    dqn.train(
                    )  # Set DQN (online network) back to training mode
                    eval_total_time = time.time() - start_time
                    eval_offset += args.evaluation_interval

                    rmean, rmedian, rstd, rmin, rmax = vec_stats(rewards)
                    lmean, lmedian, lstd, lmin, lmax = vec_stats(lengths)

                    print('reward: {:4.2f}, {:4.0f}, {:4.0f}, {:4.4f} | '
                          'length: {:4.2f}, {:4.0f}, {:4.0f}, {:4.4f} | '
                          'Avg. Q: {:4.4f} | {} | Overall FPS: {:4.2f}'.format(
                              rmean, rmin, rmax, rstd, lmean, lmin, lmax, lstd,
                              avg_Q, format_time(eval_total_time), fps),
                          flush=True)

                    if args.output_filename and csv_writer and csv_file:
                        csv_writer.writerow([
                            T, total_time, rmean, rmedian, rstd, rmin, rmax,
                            lmean, lmedian, lstd, lmin, lmax
                        ])
                        csv_file.flush()

                    if args.plot:
                        writer.add_scalar('eval/rewards', rmean, T)
                        writer.add_scalar('eval/lengths', lmean, T)
                        writer.add_scalar('eval/avg_Q', avg_Q, T)

                loss_str = '{:4.4f}'.format(avg_loss) if isinstance(
                    avg_loss, float) else avg_loss
                progress_data = 'T = {:,} epsilon = {:4.2f} avg reward = {:4.2f} loss: {} ({:4.2f}% net, {:4.2f}% mem, {:4.2f}% env)' \
                                .format(T, epsilon, final_rewards.mean().item(), loss_str, \
                                        *percent_time(total_time, net_time, mem_time, env_time))
                iterator.set_postfix_str(progress_data)

    if args.plot and (args.rank == 0):
        writer.close()

    if args.use_openai:
        train_env.close()
        test_env.close()
Ejemplo n.º 17
0
def backup_filter(data, json_input):
    backup_list = []
    for key in json_input:
        backup = key.pop("Backup", {})
        metadata = backup.pop("Metadata", {})
        backup_name = backup.pop("Name", {})
        backup = {
            "ID": backup.get("ID", ""),
            "Local database": backup.get("DBPath", ""),
        }
        backup["Versions"] = int(metadata.get("BackupListCount", 0))
        backup["Last run"] = {
            "Duration":
            helper.format_duration(metadata.get("LastBackupDuration", "0")),
            "Started":
            helper.format_time(data, metadata.get("LastBackupStarted", "0")),
            "Stopped":
            helper.format_time(data, metadata.get("LastBackupFinished", "0")),
        }
        backup["Size"] = {
            "Local": metadata.get("SourceSizeString", ""),
            "Backend": metadata.get("TargetSizeString", "")
        }

        schedule = key.get("Schedule", None)
        if schedule is not None:
            next_run = helper.format_time(data, schedule.pop("Time", ""))
            if next_run is not None:
                schedule["Next run"] = next_run
            last_run = helper.format_time(data, schedule.pop("LastRun", ""))
            if last_run is not None:
                schedule["Last run"] = last_run
            schedule.pop("AllowedDays", None)
            schedule.pop("ID", None)
            schedule.pop("Rule", None)
            schedule.pop("Tags", None)
            backup["Schedule"] = schedule

        progress_state = key.get("Progress", None)
        if progress_state is not None:
            state = progress_state.get("Phase", None)
            speed = progress_state.get("BackendSpeed", 0)
            progress = {
                "State": state,
                "Counting files": progress_state.get("StillCounting", False),
                "Backend": {
                    "Action": progress_state.get("BackendAction", 0)
                },
                "Task ID": progress_state.get("TaskID", -1),
            }
            if speed > 0:
                readable_speed = helper.format_bytes(speed) + "/s"
                progress["Backend"]["Speed"] = readable_speed

            # Display item only if relevant
            if not progress_state.get("StillCounting", False):
                progress.pop("Counting files")
            # Avoid 0 division
            file_count = progress_state.get("ProcessedFileCount", 0)
            total_file_count = progress_state.get("TotalFileCount", 0)
            processing = state == "Backup_ProcessingFiles"
            if file_count > 0 and total_file_count > 0 and processing:
                processed = "{0:.2f}".format(file_count / total_file_count *
                                             100)
                progress["Processed files"] = processed + "%"
            # Avoid 0 division
            data_size = progress_state.get("ProcessedFileSize", 0)
            total_data_size = progress_state.get("TotalFileSize", 0)
            processing = state == "Backup_ProcessingFiles"
            if data_size > 0 and total_data_size > 0 and processing:
                # Calculate percentage
                processed = "{0:.2f}".format(data_size / total_data_size * 100)
                # Format text "x% (y GB of z GB)"
                processed += "% (" + str(helper.format_bytes(data_size))
                processed += " of "
                processed += str(helper.format_bytes(total_data_size)) + ")"
                progress["Processed data"] = processed
            # Avoid 0 division
            current = progress_state.get("BackendFileProgress", 0)
            total = progress_state.get("BackendFileSize", 0)
            if current > 0 and total > 0:
                backend_progress = "{0:.2f}".format(current / total * 100)
                progress["Backend"]["Progress"] = backend_progress + "%"
            backup["Progress"] = progress

        key = {backup_name: backup}
        backup_list.append(key)

    return backup_list
Ejemplo n.º 18
0
def list_filter(data, json_input, resource):
    resource_list = []
    if resource == "backups":
        for key in json_input:
            backup = key.get("Backup", None)
            schedule = key.get("Schedule", None)
            progress_state = key.get("Progress", None)
            backup_name = backup.get("Name", "")
            backup = {
                backup_name: {
                    "ID": backup.get("ID", ""),
                }
            }

            if backup.get('Metadata', {}).get('SourceSizeString') is not None:
                size = backup.get('Metadata', {}).get('SourceSizeString')
                backup[backup_name]["Source size"] = size

            if schedule is not None:
                next_run = helper.format_time(data, schedule.get("Time", ""))
                if next_run is not None:
                    backup[backup_name]["Next run"] = next_run

                last_run = helper.format_time(data,
                                              schedule.get("LastRun", ""))
                if last_run is not None:
                    backup[backup_name]["Last run"] = last_run

            if progress_state is not None:
                backup[backup_name]["Running"] = {
                    "Task ID": progress_state.get("TaskID", None),
                    "State": progress_state.get("Phase", None),
                }

            resource_list.append(backup)

    elif resource == "notifications":
        for val in json_input:
            notification = {
                val.get("Title", ""): {
                    "Backup ID": val.get("BackupID", ""),
                    "Notification ID": val.get("ID", ""),
                }
            }
            timestamp = helper.format_time(data, val.get("Timestamp", ""))
            if timestamp is not None:
                notification["Timestamp"] = timestamp

            resource_list.append(notification)

    elif resource == "serversettings":
        for key, value in json_input.items():
            hidden_values = [
                "update-check-latest",
                "last-update-check",
                "is-first-run",
                "update-check-interval",
                "server-passphrase",
                "server-passphrase-salt",
                "server-passphrase-trayicon",
                "server-passphrase-trayicon-hash",
                "unacked-error",
                "unacked-warning",
                "has-fixed-invalid-backup-id",
            ]
            if key in hidden_values:
                continue
            setting = {key: {"value": value}}

            resource_list.append(setting)
    else:
        resource_list = json_input

    return resource_list
Ejemplo n.º 19
0
torch.cuda.manual_seed_all(seed_val)

# Store the average loss after each epoch so we can plot them.
loss_values = []
model.zero_grad()

for epoch_i in range(0, epochs):
    print("")
    print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
    print('Training...')
    t0 = time.time()
    total_loss = 0
    model.train()
    for step, batch in enumerate(train_dataloader):
        if step % 40 == 0 and not step == 0:  # logging
            elapsed = format_time(time.time() - t0)
            print('  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.'.format(
                step, len(train_dataloader), elapsed))
        model.train()
        b_input_ids = batch[0].to(device)
        b_input_mask = batch[1].to(device)
        b_labels = batch[2].to(device)
        b_categories = batch[3].to(device)

        outputs = model(b_input_ids,
                        token_type_ids=None,
                        attention_mask=b_input_mask,
                        labels=b_labels)

        loss = outputs[0]
        total_loss += loss.item()
Ejemplo n.º 20
0
def worker(gpu, ngpus_per_node, args):
    env_device, train_device = args_initialize(gpu, ngpus_per_node, args)
    train_env, test_env, observation = env_initialize(args, env_device)
    train_csv_file, train_csv_writer, eval_csv_file, eval_csv_writer, summary_writer = log_initialize(
        args, train_device)

    model = ActorCritic(args.num_stack,
                        train_env.action_space,
                        normalize=args.normalize,
                        name=args.env_name)
    model, optimizer = model_initialize(args, model, train_device)

    num_frames_per_iter = args.num_ales * args.num_steps
    total_steps = math.ceil(args.t_max /
                            (args.world_size * num_frames_per_iter))

    shape = (args.num_steps + 1, args.num_ales, args.num_stack,
             *train_env.observation_space.shape[-2:])
    states = torch.zeros(shape, device=train_device, dtype=torch.float32)
    states[0, :, -1] = observation.to(device=train_device, dtype=torch.float32)

    shape = (args.num_steps + 1, args.num_ales)
    values = torch.zeros(shape, device=train_device, dtype=torch.float32)
    returns = torch.zeros(shape, device=train_device, dtype=torch.float32)

    shape = (args.num_steps, args.num_ales)
    rewards = torch.zeros(shape, device=train_device, dtype=torch.float32)
    masks = torch.zeros(shape, device=train_device, dtype=torch.float32)
    actions = torch.zeros(shape, device=train_device, dtype=torch.long)

    # These variables are used to compute average rewards for all processes.
    episode_rewards = torch.zeros(args.num_ales,
                                  device=train_device,
                                  dtype=torch.float32)
    final_rewards = torch.zeros(args.num_ales,
                                device=train_device,
                                dtype=torch.float32)
    episode_lengths = torch.zeros(args.num_ales,
                                  device=train_device,
                                  dtype=torch.float32)
    final_lengths = torch.zeros(args.num_ales,
                                device=train_device,
                                dtype=torch.float32)

    if args.use_gae:
        gae = torch.zeros(args.num_ales,
                          device=train_device,
                          dtype=torch.float32)

    maybe_npy = lambda a: a.numpy() if args.use_openai else a

    torch.cuda.synchronize()

    iterator = range(total_steps)
    if args.rank == 0:
        iterator = tqdm(iterator)
        total_time = 0
        evaluation_offset = 0

    for update in iterator:

        T = args.world_size * update * num_frames_per_iter
        if (args.rank == 0) and (T >= evaluation_offset):
            evaluation_offset += args.evaluation_interval
            eval_lengths, eval_rewards = test(args, model, test_env)

            lmean, lmedian, lmin, lmax, lstd = gen_data(eval_lengths)
            rmean, rmedian, rmin, rmax, rstd = gen_data(eval_rewards)
            length_data = '(length) min/max/mean/median: {lmin:4.1f}/{lmax:4.1f}/{lmean:4.1f}/{lmedian:4.1f}'.format(
                lmin=lmin, lmax=lmax, lmean=lmean, lmedian=lmedian)
            reward_data = '(reward) min/max/mean/median: {rmin:4.1f}/{rmax:4.1f}/{rmean:4.1f}/{rmedian:4.1f}'.format(
                rmin=rmin, rmax=rmax, rmean=rmean, rmedian=rmedian)
            print('[training time: {}] {}'.format(
                format_time(total_time),
                ' --- '.join([length_data, reward_data])))

            if eval_csv_writer and eval_csv_file:
                eval_csv_writer.writerow([
                    T, total_time, rmean, rmedian, rmin, rmax, rstd, lmean,
                    lmedian, lmin, lmax, lstd
                ])
                eval_csv_file.flush()

            if args.plot:
                summary_writer.add_scalar('eval/rewards_mean',
                                          rmean,
                                          T,
                                          walltime=total_time)
                summary_writer.add_scalar('eval/lengths_mean',
                                          lmean,
                                          T,
                                          walltime=total_time)

        start_time = time.time()

        with torch.no_grad():

            for step in range(args.num_steps):
                value, logit = model(states[step])

                # store values
                values[step] = value.squeeze(-1)

                # convert actions to numpy and perform next step
                probs_action = F.softmax(logit,
                                         dim=1).multinomial(1).to(env_device)
                observation, reward, done, info = train_env.step(
                    maybe_npy(probs_action))

                if args.use_openai:
                    # convert back to pytorch tensors
                    observation = torch.from_numpy(observation)
                    reward = torch.from_numpy(reward)
                    done = torch.from_numpy(done.astype(np.uint8))
                else:
                    observation = observation.squeeze(-1).unsqueeze(1)

                # move back to training memory
                observation = observation.to(device=train_device)
                reward = reward.to(device=train_device, dtype=torch.float32)
                done = done.to(device=train_device, dtype=torch.bool)
                probs_action = probs_action.to(device=train_device,
                                               dtype=torch.long)

                not_done = 1.0 - done.float()

                # update rewards and actions
                actions[step].copy_(probs_action.view(-1))
                masks[step].copy_(not_done)
                rewards[step].copy_(reward.sign())

                # update next observations
                states[step + 1, :, :-1].copy_(states[step, :, 1:].clone())
                states[step + 1] *= not_done.view(
                    -1, *[1] * (observation.dim() - 1))
                states[step + 1, :,
                       -1].copy_(observation.view(-1,
                                                  *states.size()[-2:]))

                # update episodic reward counters
                episode_rewards += reward
                final_rewards[done] = episode_rewards[done]
                episode_rewards *= not_done

                episode_lengths += not_done
                final_lengths[done] = episode_lengths[done]
                episode_lengths *= not_done

            returns[-1] = values[-1] = model(states[-1])[0].data.squeeze(-1)

            if args.use_gae:
                gae.zero_()
                for step in reversed(range(args.num_steps)):
                    delta = rewards[step] + (args.gamma * values[step + 1] *
                                             masks[step]) - values[step]
                    gae = delta + (args.gamma * args.tau * masks[step] * gae)
                    returns[step] = gae + values[step]
            else:
                for step in reversed(range(args.num_steps)):
                    returns[step] = rewards[step] + (
                        args.gamma * returns[step + 1] * masks[step])

        value, logit = model(states[:-1].view(-1, *states.size()[-3:]))

        log_probs = F.log_softmax(logit, dim=1)
        probs = F.softmax(logit, dim=1)

        action_log_probs = log_probs.gather(1, actions.view(-1).unsqueeze(-1))
        dist_entropy = -(log_probs * probs).sum(-1).mean()

        advantages = returns[:-1].view(-1).unsqueeze(-1) - value

        value_loss = advantages.pow(2).mean()
        policy_loss = -(advantages.clone().detach() * action_log_probs).mean()

        loss = value_loss * args.value_loss_coef + policy_loss - dist_entropy * args.entropy_coef
        optimizer.zero_grad()

        if args.cpu_train:
            loss.backward()
            master_params = model.parameters()
        else:
            with amp.scale_loss(loss, optimizer) as scaled_loss:
                scaled_loss.backward()
            master_params = amp.master_params(optimizer)

        torch.nn.utils.clip_grad_norm_(master_params, args.max_grad_norm)
        optimizer.step()

        states[0].copy_(states[-1])

        torch.cuda.synchronize()

        if args.rank == 0:
            iter_time = time.time() - start_time
            total_time += iter_time

            if args.plot:
                summary_writer.add_scalar('train/rewards_mean',
                                          final_rewards.mean().item(),
                                          T,
                                          walltime=total_time)
                summary_writer.add_scalar('train/lengths_mean',
                                          final_lengths.mean().item(),
                                          T,
                                          walltime=total_time)
                summary_writer.add_scalar('train/value_loss',
                                          value_loss,
                                          T,
                                          walltime=total_time)
                summary_writer.add_scalar('train/policy_loss',
                                          policy_loss,
                                          T,
                                          walltime=total_time)
                summary_writer.add_scalar('train/entropy',
                                          dist_entropy,
                                          T,
                                          walltime=total_time)

            progress_data = callback(args, model, T, iter_time,
                                     final_rewards, final_lengths,
                                     value_loss.item(), policy_loss.item(),
                                     dist_entropy.item(), train_csv_writer,
                                     train_csv_file)
            iterator.set_postfix_str(progress_data)

    if args.plot and (args.rank == 0):
        writer.close()

    if args.use_openai:
        train_env.close()
    if args.use_openai_test_env:
        test_env.close()
Ejemplo n.º 21
0
 def test_format_time(self):
     test_time_1 = helper.format_time([u'12:00', u'AM'])
     test_time_2 = helper.format_time([u'2:00', u'PM'])
     self.assertEqual(test_time_1, '00:00')
     self.assertEqual(test_time_2, '14:00')