コード例 #1
0
ファイル: slavenode.py プロジェクト: hsc-lab/octopus
 def do(self, type, job, hashtype):
     if debug:
         print "SlaveProtocol.do(%s, %s, %s)" % (type, str(job), hashtype)
     """
     Answer to a DO command. Used to send a job.
     """
     if self.factory.waiting:
         if debug:
             print "    I was waiting for a job... but not anymore :D"
         self.factory.waiting = False
         reg = regexes[program]
         if debug:
             print "    Using regex:", reg
         vars["JOB"] = makeJob[type][program](job)
         if debug:
             print "    vars[JOB] =", vars["JOB"]
         vars["HASHTYPE"] = attacks.hashtypes[hashtype][program]["id"]
         if debug:
             print "    vars[HASHTYPE] =", vars["HASHTYPE"]
         vars["d"], vars["l"] = charsets["d"], charsets["l"]
         vars["u"], vars["s"] = charsets["u"], charsets["s"]
         vars["a"] = charsets["a"]
         if debug:
             print "    Charsets set in vars"
         if not vars["JOB"]:
             vars["JOB"] = "%s/tmp/chunk" % (home)
             dname, nb, step = job.split(":")
             nb = int(nb)
             step = int(step)
             self.program = program
             self.type = type
             self.vars = vars
             self.reg = reg
             self.deferatt = defer.Deferred()
             self.deferatt.addCallback(self.resumeAtt)
             self.sendGet("chunk", [dname, nb, step])
             return
         cmd = attack(program, type, vars)
         if debug:
             print "    Command:", cmd
         else:
             print "octopus_sh$", cmd
         self.factory.ap = AttackProtocol(reg, self)
         ap = self.factory.ap
         reactor.spawnProcess(ap, cmd.split()[0], cmd.split(), {})
         self.deferred = defer.Deferred()
         self.deferred.addCallback(self.sendResult)
         if debug:
             print "    Attack started!"
     else:
         print "    Still busy... Please wait! :v"
         self.sendWait()
コード例 #2
0
ファイル: slavenode.py プロジェクト: hsc-lab/octopus
 def resumeAtt(self, chunk):
     with open("%s/tmp/chunk" % (home), "w") as f:
         chunk = chunk.encode("utf-8")
         f.write(chunk)
     reg = self.reg
     program = self.program
     type = self.type
     vars = self.vars
     cmd = attack(program, type, vars)
     if debug:
         print "    Command:", cmd
     else:
         print "octopus_sh$", cmd
     self.factory.ap = AttackProtocol(reg, self)
     ap = self.factory.ap
     reactor.spawnProcess(ap, cmd.split()[0], cmd.split(), {})
     self.deferred = defer.Deferred()
     self.deferred.addCallback(self.sendResult)
     if debug:
         print "    Attack started!"
コード例 #3
0
def run_attack():
    # read the image as bytes
    in_img = request.files['image']

    # read additional json data
    # data['model'] contains the string name of the model
    data = json.loads(request.form['jsonData'])
    network = data['network']
    top = data['top']

    response = {}
    if (data['model'].endswith(".json")):
        model_path = os.path.join(MODEL_DIRECTORY, data['model'])

        # run attack on the image, returns the modified image as a numpy array
        # arguments: input image as array of bytes, path to the model to run the attack,
        # returns a jpg image in base 64 which can be sent via json
        mod_img_np, mod_img_b64 = attack(_file_to_cv(in_img), model_path)
        predictions, class_codes = nnmodels.get_predictions(
            mod_img_np, network, top)

        # build and return the json response
        response = {
            "encoding": "data:image/jpeg;base64,",
            "img_base64": mod_img_b64,
            "mod_class_name": predictions[0][1],
            "mod_class_code": int(class_codes[0])
        }
    else:
        mod_img_np, mod_img_b64 = custom_attack(_file_to_cv(in_img),
                                                json.loads(data['model']))
        predictions, class_codes = nnmodels.get_predictions(
            mod_img_np, network, top)
        response = {
            "encoding": "data:image/jpeg;base64,",
            "img_base64": mod_img_b64,
            "mod_class_name": predictions[0][1],
            "mod_class_code": int(class_codes[0])
        }

    return jsonify(response)
コード例 #4
0
def run_experiment(homoglyphs,
                   attack_type,
                   detector,
                   experiment_name,
                   data_file,
                   percent_change=None,
                   misspelling_dict=None,
                   throwout=False):

    start_time = time.time()

    out_path = './experimental_results/' + experiment_name + '/'
    adv_text_path = out_path + 'adv_texts/'
    numerical_results_path = out_path + 'results.txt'
    num_changes_path = out_path + 'num_changes.txt'

    print('Running Experiment: {} ...'.format(experiment_name))

    if not os.path.isdir(out_path):
        os.mkdir(out_path)
        os.mkdir(adv_text_path)

    text_list = load_json_file(data_file)

    _range = tqdm(range(len(text_list)))
    i = 0

    for _ in _range:

        text_to_use = det.tokenizer.decode(
            det.tokenizer.encode(text_list[i],
                                 max_length=det.tokenizer.max_len))[3:-4]

        adv_text, num_changes = attack(text_to_use, homoglyphs, attack_type,
                                       percent_change, misspelling_dict,
                                       throwout)

        if throwout and (adv_text == text_to_use):
            pass

        else:

            write_txt(adv_text_path + str(i) + '.txt', adv_text)

            probs = detector.predict(adv_text)

            human_prob = probs[1]

            _range.set_description('{} | {}'.format(i, human_prob))

            with open(numerical_results_path, 'a') as f:
                f.write(str(human_prob) + ' ')
            f.close()

            with open(num_changes_path, 'a') as f:
                f.write(str(num_changes) + ' ')
            f.close()

        i += 1

    end_time = time.time()

    print('Time to complete experiment (minutes):',
          (end_time - start_time) / 60.)
コード例 #5
0
def compute_td_loss(current_model, target_model, batch_size, replay_buffer,
                    per, use_cpp_buffer, use_async_rb, optimizer, gamma,
                    memory_mgr, robust, **kwargs):
    t = time.time()
    dtype = kwargs['dtype']
    if per:
        buffer_beta = kwargs['buffer_beta']
        if use_async_rb:
            if not replay_buffer.sample_available():
                replay_buffer.async_sample(batch_size, buffer_beta)
            res = replay_buffer.wait_sample()
            replay_buffer.async_sample(batch_size, buffer_beta)
        else:
            res = replay_buffer.sample(batch_size, buffer_beta)
        if use_cpp_buffer:
            state, action, reward, next_state, done, indices, weights = res[
                'obs'], res['act'], res['rew'], res['next_obs'], res[
                    'done'], res['indexes'], res['weights']
        else:
            state, action, reward, next_state, done, weights, indices = res[
                0], res[1], res[2], res[3], res[4], res[5], res[6]
    else:
        if use_async_rb:
            if replay_buffer.sample_available():
                replay_buffer.async_sample(batch_size)
            res = replay_buffer.wait_sample()
            replay_buffer.async_sample(batch_size)
        else:
            res = replay_buffer.sample(batch_size)
        if use_cpp_buffer:
            state, action, reward, next_state, done = res['obs'], res[
                'act'], res['rew'], res['next_obs'], res['done']
        else:
            state, action, reward, next_state, done = res[0], res[1], res[
                2], res[3], res[4]
    if use_cpp_buffer and not use_async_rb:
        action = action.transpose()[0].astype(int)
        reward = reward.transpose()[0].astype(int)
        done = done.transpose()[0].astype(int)
    log_time('sample_time', time.time() - t)

    t = time.time()
    numpy_weights = weights
    if per:
        state, next_state, action, reward, done, weights = memory_mgr.get_cuda_tensors(
            state, next_state, action, reward, done, weights)
    else:
        state, next_state, action, reward, done = memory_mgr.get_cuda_tensors(
            state, next_state, action, reward, done)

    bound_solver = kwargs.get('bound_solver', 'cov')
    optimizer.zero_grad()

    state = state.to(torch.float)
    next_state = next_state.to(torch.float)
    # Normalize input pixel to 0-1
    if dtype in UINTS:
        state /= 255
        next_state /= 255
        state_max = 1.0
        state_min = 0.0
    else:
        state_max = float('inf')
        state_min = float('-inf')
    beta = kwargs.get('beta', 0)

    if robust and bound_solver != 'pgd':
        cur_q_logits = current_model(state, method_opt="forward")
        tgt_next_q_logits = target_model(next_state, method_opt="forward")
    else:
        cur_q_logits = current_model(state)
        tgt_next_q_logits = target_model(next_state)
    if robust:
        eps = kwargs['eps']
    cur_q_value = cur_q_logits.gather(1, action.unsqueeze(1)).squeeze(1)

    tgt_next_q_value = tgt_next_q_logits.max(1)[0]
    expected_q_value = reward + gamma * tgt_next_q_value * (1 - done)
    '''
    # Merge two states into one batch
    state = state.to(torch.float)
    if dtype in UINTS:
        state /= 255
    state_and_next_state = torch.cat((state, next_state), 0)
    logits = current_model(state_and_next_state)
    cur_q_logits = logits[:state.size(0)]
    cur_next_q_logits = logits[state.size(0):]
    tgt_next_q_value  = tgt_next_q_logits.gather(1, torch.max(cur_next_q_logits, 1)[1].unsqueeze(1)).squeeze(1)
    '''

    if kwargs['natural_loss_fn'] == 'huber':
        loss_fn = torch.nn.SmoothL1Loss(reduction='none')
        loss = loss_fn(cur_q_value, expected_q_value.detach())
    else:
        loss = (cur_q_value - expected_q_value.detach()).pow(2)
    if per:
        loss = loss * weights
        prios = loss + 1e-5
        weights_norm = np.linalg.norm(numpy_weights)

    batch_cur_q_value = torch.mean(cur_q_value)
    batch_exp_q_value = torch.mean(expected_q_value)
    loss = loss.mean()
    td_loss = loss.clone()

    if robust:
        if eps < np.finfo(np.float32).tiny:
            reg_loss = torch.zeros(state.size(0))
            if USE_CUDA:
                reg_loss = reg_loss.cuda()
            if bound_solver == 'pgd':
                labels = torch.argmax(cur_q_logits, dim=1).clone().detach()
                adv_margin = ori_margin = logits_margin(
                    current_model.forward(state), labels)
                optimizer.zero_grad()
        else:
            if bound_solver != 'pgd':
                sa = kwargs.get('sa', None)
                pred = cur_q_logits
                labels = torch.argmax(pred, dim=1).clone().detach()
                c = torch.eye(current_model.num_actions).type_as(
                    state)[labels].unsqueeze(1) - torch.eye(
                        current_model.num_actions).type_as(state).unsqueeze(0)
                I = (~(labels.data.unsqueeze(1) == torch.arange(
                    current_model.num_actions).type_as(
                        labels.data).unsqueeze(0)))
                c = (c[I].view(state.size(0), current_model.num_actions - 1,
                               current_model.num_actions))
                sa_labels = sa[labels]
                lb_s = torch.zeros(state.size(0), current_model.num_actions)
                if USE_CUDA:
                    labels = labels.cuda()
                    c = c.cuda()
                    sa_labels = sa_labels.cuda()
                    lb_s = lb_s.cuda()
                env_id = kwargs.get('env_id', '')
                if env_id == 'Acrobot-v1':
                    eps_v = get_acrobot_eps(eps)
                    if USE_CUDA:
                        eps_v = eps_v.cuda()
                else:
                    eps_v = eps
                state_ub = torch.clamp(state + eps_v, max=state_max)
                state_lb = torch.clamp(state - eps_v, min=state_min)

                lb = get_logits_lower_bound(current_model, state, state_ub,
                                            state_lb, eps_v, c, beta)

                hinge = kwargs.get('hinge', False)
                if hinge:
                    reg_loss, _ = torch.min(lb, dim=1)
                    hinge_c = kwargs.get('hinge_c', 1)
                    reg_loss = torch.clamp(reg_loss, max=hinge_c)
                    reg_loss = -reg_loss
                else:
                    lb = lb_s.scatter(1, sa_labels, lb)
                    reg_loss = CrossEntropyLoss()(-lb, labels)
            else:
                labels = torch.argmax(cur_q_logits, dim=1).clone().detach()
                hinge_c = kwargs.get('hinge_c', 1)
                adv_state = attack(current_model, state,
                                   kwargs['attack_config'], logits_margin)
                optimizer.zero_grad()
                adv_margin = logits_margin(current_model.forward(adv_state),
                                           labels)
                ori_margin = logits_margin(current_model.forward(state),
                                           labels)
                reg_loss = torch.clamp(adv_margin, min=-hinge_c)

        if per:
            reg_loss = reg_loss * weights
        reg_loss = reg_loss.mean()
        kappa = kwargs['kappa']
        loss += kappa * reg_loss

    loss.backward()

    # Gradient clipping.
    grad_norm = 0.0
    max_norm = kwargs['grad_clip']
    if max_norm > 0:
        parameters = current_model.parameters()
        for p in parameters:
            grad_norm += p.grad.data.norm(2).item()**2
        grad_norm = np.sqrt(grad_norm)
        clip_coef = max_norm / (grad_norm + 1e-6)
        if clip_coef < 1:
            for p in parameters:
                p.grad.data.mul_(clip_coef)

    # update weights
    optimizer.step()

    nn_time = time.time() - t
    log_time('nn_time', time.time() - t)
    t = time.time()
    if per:
        replay_buffer.update_priorities(indices, prios.data.cpu().numpy())
    log_time('reweight_time', time.time() - t)

    res = (loss, grad_norm, weights_norm, td_loss, batch_cur_q_value,
           batch_exp_q_value)
    if robust:
        if bound_solver == 'pgd':
            res += (ori_margin, adv_margin)
        res += (reg_loss, )
    return res
コード例 #6
0
def main(args):
    config = load_config(args)
    prefix = config['env_id']
    training_config = config['training_config']
    if config['name_suffix']:
        prefix += config['name_suffix']
    if config['path_prefix']:
        prefix = os.path.join(config['path_prefix'], prefix)
    if not os.path.exists(prefix):
        os.makedirs(prefix)

    train_log = os.path.join(prefix, 'train.log')
    logger = Logger(open(train_log, "w"))
    logger.log('Command line:', " ".join(sys.argv[:]))
    logger.log(args)
    logger.log(config)

    env_params = training_config['env_params']
    env_id = config['env_id']
    if "NoFrameskip" not in env_id:
        env = make_atari_cart(env_id)
    else:
        env = make_atari(env_id)
        env = wrap_deepmind(env, **env_params)
        env = wrap_pytorch(env)

    seed = training_config['seed']
    env.seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)

    state = env.reset()
    dtype = state.dtype
    logger.log("env_shape: {}, num of actions: {}".format(
        env.observation_space.shape, env.action_space.n))
    if "NoFrameskip" in env_id:
        logger.log('action meaning:',
                   env.unwrapped.get_action_meanings()[:env.action_space.n])

    robust = training_config.get('robust', False)
    adv_train = training_config.get('adv_train', False)
    bound_solver = training_config.get('bound_solver', 'cov')
    attack_config = {}
    if adv_train or bound_solver == 'pgd':
        test_config = config['test_config']
        attack_config = training_config["attack_config"]
        adv_ratio = training_config.get('adv_ratio', 1)
        if adv_train:
            logger.log('using adversarial examples for training, adv ratio:',
                       adv_ratio)
        else:
            logger.log('using pgd regularization training')
    if robust or adv_train:
        schedule_start = training_config['schedule_start']
        schedule_length = training_config['schedule_length']
        starting_epsilon = training_config['start_epsilon']
        end_epsilon = training_config['epsilon']
        epsilon_scheduler = EpsilonScheduler(
            training_config.get("schedule_type", "linear"), schedule_start,
            schedule_start + schedule_length - 1, starting_epsilon,
            end_epsilon, 1)
        max_eps = end_epsilon

    model_width = training_config['model_width']
    robust_model = robust and bound_solver != 'pgd'
    dueling = training_config.get('dueling', True)

    current_model = model_setup(env_id, env, robust_model, logger, USE_CUDA,
                                dueling, model_width)
    target_model = model_setup(env_id, env, robust_model, logger, USE_CUDA,
                               dueling, model_width)

    load_path = training_config["load_model_path"]
    if load_path != "" and os.path.exists(load_path):
        load_frame = int(re.findall('^.*frame_([0-9]+).pth$', load_path)[0])
        logger.log('\ntrain from model {}, current frame index is {}\n'.format(
            load_path, load_frame))
        current_model.features.load_state_dict(torch.load(load_path))
        target_model.features.load_state_dict(torch.load(load_path))
    else:
        logger.log('\ntrain from scratch')
        load_frame = 1

    lr = training_config['lr']
    grad_clip = training_config['grad_clip']
    natural_loss_fn = training_config['natural_loss_fn']
    optimizer = optim.Adam(current_model.parameters(),
                           lr=lr,
                           eps=training_config['adam_eps'])
    # Do not evaluate gradient for target model.
    for param in target_model.features.parameters():
        param.requires_grad = False

    buffer_config = training_config['buffer_params']
    replay_initial = buffer_config['replay_initial']
    buffer_capacity = buffer_config['buffer_capacity']
    use_cpp_buffer = training_config["cpprb"]
    use_async_rb = training_config['use_async_rb']
    num_frames = training_config['num_frames']
    batch_size = training_config['batch_size']
    gamma = training_config['gamma']

    if use_cpp_buffer:
        logger.log('using cpp replay buffer')
        if use_async_rb:
            replay_buffer_ctor = AsyncReplayBuffer(initial_state=state,
                                                   batch_size=batch_size)
        else:
            replay_buffer_ctor = cpprb.PrioritizedReplayBuffer
    else:
        logger.log('using python replay buffer')
    per = training_config['per']

    if per:
        logger.log('using prioritized experience replay.')
        alpha = buffer_config['alpha']
        buffer_beta_start = buffer_config['buffer_beta_start']
        buffer_beta_frames = buffer_config.get('buffer_beta_frames', -1)
        if buffer_beta_frames < replay_initial:
            buffer_beta_frames = num_frames - replay_initial
            logger.log('beffer_beta_frames reset to ', buffer_beta_frames)
        buffer_beta_scheduler = BufferBetaScheduler(buffer_beta_start,
                                                    buffer_beta_frames,
                                                    start_frame=replay_initial)
        if use_cpp_buffer:
            replay_buffer = replay_buffer_ctor(
                size=buffer_capacity,
                # env_dict={"obs": {"shape": state.shape, "dtype": np.uint8},
                env_dict={
                    "obs": {
                        "shape": state.shape,
                        "dtype": dtype
                    },
                    "act": {
                        "shape": 1,
                        "dtype": np.uint8
                    },
                    "rew": {},
                    # "next_obs": {"shape": state.shape, "dtype": np.uint8},
                    "next_obs": {
                        "shape": state.shape,
                        "dtype": dtype
                    },
                    "done": {}
                },
                alpha=alpha,
                eps=0.0)  # We add eps manually in training loop
        else:
            replay_buffer = PrioritizedReplayBuffer(buffer_capacity,
                                                    alpha=alpha)

    else:
        logger.log('using regular replay.')
        if use_cpp_buffer:
            replay_buffer = cpprb.ReplayBuffer(
                buffer_capacity,
                # {"obs": {"shape": state.shape, "dtype": np.uint8},
                {
                    "obs": {
                        "shape": state.shape,
                        "dtype": dtype
                    },
                    "act": {
                        "shape": 1,
                        "dtype": np.uint8
                    },
                    "rew": {},
                    # "next_obs": {"shape": state.shape, "dtype": np.uint8},
                    "next_obs": {
                        "shape": state.shape,
                        "dtype": dtype
                    },
                    "done": {}
                })
        else:
            replay_buffer = ReplayBuffer(buffer_capacity)

    update_target(current_model, target_model)

    act_epsilon_start = training_config['act_epsilon_start']
    act_epsilon_final = training_config['act_epsilon_final']
    act_epsilon_decay = training_config['act_epsilon_decay']
    act_epsilon_method = training_config['act_epsilon_method']
    if training_config.get('act_epsilon_decay_zero', True):
        decay_zero = num_frames
    else:
        decay_zero = None
    act_epsilon_scheduler = ActEpsilonScheduler(act_epsilon_start,
                                                act_epsilon_final,
                                                act_epsilon_decay,
                                                method=act_epsilon_method,
                                                start_frame=replay_initial,
                                                decay_zero=decay_zero)

    # Use optimized cuda memory management
    memory_mgr = CudaTensorManager(state.shape,
                                   batch_size,
                                   per,
                                   USE_CUDA,
                                   dtype=dtype)

    losses = []
    td_losses = []
    batch_cur_q = []
    batch_exp_q = []

    sa = None
    kappa = None
    hinge = False
    if robust:
        logger.log(
            'using convex relaxation certified classification loss as a regularization!'
        )
        kappa = training_config['kappa']
        reg_losses = []
        sa = np.zeros(
            (current_model.num_actions, current_model.num_actions - 1),
            dtype=np.int32)
        for i in range(sa.shape[0]):
            for j in range(sa.shape[1]):
                if j < i:
                    sa[i][j] = j
                else:
                    sa[i][j] = j + 1
        sa = torch.LongTensor(sa)
        hinge = training_config.get('hinge', False)
        logger.log('using hinge loss (default is cross entropy): ', hinge)

    if training_config['use_async_env']:
        # Create an environment in a separate process, run asychronously
        async_env = AsyncEnv(env_id,
                             result_path=prefix,
                             draw=training_config['show_game'],
                             record=training_config['record_game'],
                             env_params=env_params,
                             seed=seed)

    # initialize parameters in logging
    all_rewards = []
    episode_reward = 0
    act_epsilon = np.nan
    grad_norm = np.nan
    weights_norm = np.nan
    best_test_reward = -float('inf')
    buffer_stored_size = 0
    if adv_train:
        attack_count = 0
        suc_count = 0
    if robust and bound_solver == 'pgd':
        ori_margin, adv_margin = np.nan, np.nan

    start_time = time.time()
    period_start_time = time.time()

    # Main Loop
    for frame_idx in range(load_frame, num_frames + 1):
        # Step 1: get current action
        frame_start = time.time()
        t = time.time()

        eps = 0
        if adv_train or robust:
            eps = epsilon_scheduler.get_eps(frame_idx, 0)

        act_epsilon = act_epsilon_scheduler.get(frame_idx)
        if adv_train and eps != np.nan and eps >= np.finfo(np.float32).tiny:
            ori_state_tensor = torch.from_numpy(
                np.ascontiguousarray(state)).unsqueeze(0).cuda().to(
                    torch.float32)
            if dtype in UINTS:
                ori_state_tensor /= 255
            attack_config['params']['epsilon'] = eps
            if random.random() < adv_ratio:
                attack_count += 1
                state_tensor = attack(current_model, ori_state_tensor,
                                      attack_config)
                if current_model.act(state_tensor)[0] != current_model.act(
                        ori_state_tensor)[0]:
                    suc_count += 1
            else:
                state_tensor = ori_state_tensor
            action = current_model.act(state_tensor, act_epsilon)[0]
        else:
            with torch.no_grad():
                state_tensor = torch.from_numpy(
                    np.ascontiguousarray(state)).unsqueeze(0).cuda().to(
                        torch.float32)
                if dtype in UINTS:
                    state_tensor /= 255
                ori_state_tensor = torch.clone(state_tensor)
                action = current_model.act(state_tensor, act_epsilon)[0]

        # torch.cuda.synchronize()
        log_time('act_time', time.time() - t)

        # Step 2: run environment
        t = time.time()
        if training_config['use_async_env']:
            async_env.async_step(action)
        else:
            next_state, reward, done, _ = env.step(action)
        log_time('env_time', time.time() - t)

        # Step 3: save to buffer
        # For asynchronous env, defer saving
        if not training_config['use_async_env']:
            t = time.time()
            if use_cpp_buffer:
                replay_buffer.add(obs=state,
                                  act=action,
                                  rew=reward,
                                  next_obs=next_state,
                                  done=done)
            else:
                replay_buffer.push(state, action, reward, next_state, done)
            log_time('save_time', time.time() - t)

        if use_cpp_buffer:
            buffer_stored_size = replay_buffer.get_stored_size()
        else:
            buffer_stored_size = len(replay_buffer)

        beta = np.nan
        buffer_beta = np.nan
        t = time.time()

        if buffer_stored_size > replay_initial:
            if training_config['per']:
                buffer_beta = buffer_beta_scheduler.get(frame_idx)
            if robust:
                convex_final_beta = training_config['convex_final_beta']
                convex_start_beta = training_config['convex_start_beta']
                beta = (
                    max_eps - eps *
                    (1.0 - convex_final_beta)) / max_eps * convex_start_beta

            res = compute_td_loss(current_model,
                                  target_model,
                                  batch_size,
                                  replay_buffer,
                                  per,
                                  use_cpp_buffer,
                                  use_async_rb,
                                  optimizer,
                                  gamma,
                                  memory_mgr,
                                  robust,
                                  buffer_beta=buffer_beta,
                                  grad_clip=grad_clip,
                                  natural_loss_fn=natural_loss_fn,
                                  eps=eps,
                                  beta=beta,
                                  sa=sa,
                                  kappa=kappa,
                                  dtype=dtype,
                                  hinge=hinge,
                                  hinge_c=training_config.get('hinge_c', 1),
                                  env_id=env_id,
                                  bound_solver=bound_solver,
                                  attack_config=attack_config)
            loss, grad_norm, weights_norm, td_loss, batch_cur_q_value, batch_exp_q_value = res[
                0], res[1], res[2], res[3], res[4], res[5]
            if robust:
                reg_loss = res[-1]
                reg_losses.append(reg_loss.data.item())
                if bound_solver == 'pgd':
                    ori_margin, adv_margin = res[-3].data.item(
                    ), res[-2].data.item()

            losses.append(loss.data.item())
            td_losses.append(td_loss.data.item())
            batch_cur_q.append(batch_cur_q_value.data.item())
            batch_exp_q.append(batch_exp_q_value.data.item())

        log_time('loss_time', time.time() - t)

        # Step 2: run environment (async)
        t = time.time()
        if training_config['use_async_env']:
            next_state, reward, done, _ = async_env.wait_step()
        log_time('env_time', time.time() - t)

        # Step 3: save to buffer (async)
        if training_config['use_async_env']:
            t = time.time()
            if use_cpp_buffer:
                replay_buffer.add(obs=state,
                                  act=action,
                                  rew=reward,
                                  next_obs=next_state,
                                  done=done)
            else:
                replay_buffer.push(state, action, reward, next_state, done)
            log_time('save_time', time.time() - t)

        # Update states and reward
        t = time.time()
        state = next_state
        episode_reward += reward
        if done:
            if training_config['use_async_env']:
                state = async_env.reset()
            else:
                state = env.reset()
            all_rewards.append(episode_reward)
            episode_reward = 0
        log_time('env_time', time.time() - t)

        # All kinds of result logging
        if frame_idx % training_config[
                'print_frame'] == 0 or frame_idx == num_frames or (
                    robust and abs(frame_idx - schedule_start) < 5
                ) or abs(buffer_stored_size - replay_initial) < 5:
            logger.log(
                '\nframe {}/{}, learning rate: {:.6g}, buffer beta: {:.6g}, action epsilon: {:.6g}'
                .format(frame_idx, num_frames, lr, buffer_beta, act_epsilon))
            logger.log(
                'total time: {:.2f}, epoch time: {:.4f}, speed: {:.2f} frames/sec, last total loss: {:.6g}, avg total loss: {:.6g}, grad norm: {:.6g}, weights_norm: {:.6g}, latest episode reward: {:.6g}, avg 10 episode reward: {:.6g}'
                .format(
                    time.time() - start_time,
                    time.time() - period_start_time,
                    training_config['print_frame'] /
                    (time.time() - period_start_time),
                    losses[-1] if losses else np.nan,
                    np.average(losses[:-training_config['print_frame'] -
                                      1:-1]) if losses else np.nan, grad_norm,
                    weights_norm, all_rewards[-1] if all_rewards else np.nan,
                    np.average(all_rewards[:-11:-1])
                    if all_rewards else np.nan))
            logger.log('last td loss: {:.6g}, avg td loss: {:.6g}'.format(
                td_losses[-1] if td_losses else np.nan,
                np.average(td_losses[:-training_config['print_frame'] -
                                     1:-1]) if td_losses else np.nan))
            logger.log(
                'last batch cur q: {:.6g}, avg batch cur q: {:.6g}'.format(
                    batch_cur_q[-1] if batch_cur_q else np.nan,
                    np.average(batch_cur_q[:-training_config['print_frame'] -
                                           1:-1]) if batch_cur_q else np.nan))
            logger.log(
                'last batch exp q: {:.6g}, avg batch exp q: {:.6g}'.format(
                    batch_exp_q[-1] if batch_exp_q else np.nan,
                    np.average(batch_exp_q[:-training_config['print_frame'] -
                                           1:-1]) if batch_exp_q else np.nan))
            if robust:
                logger.log('current input epsilon: {:.6g}'.format(eps))
                if bound_solver == 'pgd':
                    logger.log(
                        'last logit margin: ori: {:.6g}, adv: {:.6g}'.format(
                            ori_margin, adv_margin))
                else:
                    logger.log('current bound beta: {:.6g}'.format(beta))
                logger.log(
                    'last cert reg loss: {:.6g}, avg cert reg loss: {:.6g}'.
                    format(
                        reg_losses[-1] if reg_losses else np.nan,
                        np.average(
                            reg_losses[:-training_config['print_frame'] -
                                       1:-1]) if reg_losses else np.nan))
                logger.log('current kappa: {:.6g}'.format(kappa))
            if adv_train:
                logger.log(
                    'current attack epsilon (same as input epsilon): {:.6g}'.
                    format(eps))
                diff = ori_state_tensor - state_tensor
                diff = np.abs(diff.data.cpu().numpy())
                logger.log('current Linf distortion: {:.6g}'.format(
                    np.max(diff)))
                logger.log(
                    'this batch attacked: {}, success: {}, attack success rate: {:.6g}'
                    .format(
                        attack_count, suc_count, suc_count * 1.0 /
                        attack_count if attack_count > 0 else np.nan))
                attack_count = 0
                suc_count = 0
                logger.log('attack stats reseted.')

            period_start_time = time.time()
            log_time.print()
            log_time.clear()

        if frame_idx % training_config[
                'save_frame'] == 0 or frame_idx == num_frames:
            plot(frame_idx, all_rewards, losses, prefix)
            torch.save(current_model.features.state_dict(),
                       '{}/frame_{}.pth'.format(prefix, frame_idx))

        if frame_idx % training_config['update_target_frame'] == 0:
            update_target(current_model, target_model)

        if frame_idx % training_config.get('mini_test', 100000) == 0 and (
            (robust and beta == 0) or
            (not robust and frame_idx * 1.0 / num_frames >= 0.8)):
            test_reward = mini_test(current_model, config, logger, dtype)
            logger.log('this test avg reward: {:6g}'.format(test_reward))
            if test_reward >= best_test_reward:
                best_test_reward = test_reward
                logger.log(
                    'new best reward {:6g} achieved, update checkpoint'.format(
                        test_reward))
                torch.save(current_model.features.state_dict(),
                           '{}/best_frame_{}.pth'.format(prefix, frame_idx))

        log_time.log_time('total', time.time() - frame_start)
コード例 #7
0
ファイル: test.py プロジェクト: zijianh4/SA_DQN
def main(args):
    config = load_config(args)
    prefix = config['env_id']
    training_config = config['training_config']
    test_config = config['test_config']
    attack_config = test_config["attack_config"]
    if config['name_suffix']:
        prefix += config['name_suffix']
    if config['path_prefix']:
        prefix = os.path.join(config['path_prefix'], prefix)
    if 'load_model_path' in test_config and os.path.isfile(
            test_config['load_model_path']):
        if not os.path.exists(prefix):
            os.makedirs(prefix)
        test_log = os.path.join(prefix, test_config['log_name'])
    else:
        if os.path.exists(prefix):
            test_log = os.path.join(prefix, test_config['log_name'])
        else:
            raise ValueError(
                'Path {} not exists, please specify test model path.')
    logger = Logger(open(test_log, "w"))
    logger.log('Command line:', " ".join(sys.argv[:]))
    logger.log(args)
    logger.log(config)
    certify = test_config.get('certify', False)
    env_params = training_config['env_params']
    env_params['clip_rewards'] = False
    env_params['episode_life'] = False
    env_id = config['env_id']

    if "NoFrameskip" not in env_id:
        env = make_atari_cart(env_id)
    else:
        env = make_atari(env_id)
        env = wrap_deepmind(env, **env_params)
        env = wrap_pytorch(env)

    state = env.reset()
    dtype = state.dtype
    logger.log("env_shape: {}, num of actions: {}".format(
        env.observation_space.shape, env.action_space.n))

    model_width = training_config['model_width']
    robust_model = certify
    dueling = training_config.get('dueling', True)

    model = model_setup(env_id, env, robust_model, logger, USE_CUDA, dueling,
                        model_width)

    if 'load_model_path' in test_config and os.path.isfile(
            test_config['load_model_path']):
        model_path = test_config['load_model_path']
    else:
        logger.log("choosing the best model from " + prefix)
        all_idx = [
            int(f[6:-4]) for f in os.listdir(prefix)
            if os.path.isfile(os.path.join(prefix, f))
            and os.path.splitext(f)[1] == '.pth' and 'best' not in f
        ]
        all_best_idx = [
            int(f[11:-4]) for f in os.listdir(prefix)
            if os.path.isfile(os.path.join(prefix, f))
            and os.path.splitext(f)[1] == '.pth' and 'best' in f
        ]
        if all_best_idx:
            model_frame_idx = max(all_best_idx)
            model_name = 'best_frame_{}.pth'.format(model_frame_idx)
        else:
            model_frame_idx = max(all_idx)
            model_name = 'frame_{}.pth'.format(model_frame_idx)
        model_path = os.path.join(prefix, model_name)

    logger.log('model loaded from ' + model_path)
    model.features.load_state_dict(torch.load(model_path))
    num_episodes = test_config['num_episodes']
    max_frames_per_episode = test_config['max_frames_per_episode']

    all_rewards = []
    episode_reward = 0

    seed = random.randint(0, sys.maxsize)
    logger.log('reseting env with seed', seed)
    env.seed(seed)
    state = env.reset()
    start_time = time.time()
    if training_config['use_async_env']:
        # Create an environment in a separate process, run asychronously
        async_env = AsyncEnv(env_id,
                             result_path=prefix,
                             draw=training_config['show_game'],
                             record=training_config['record_game'],
                             save_frames=test_config['save_frames'],
                             env_params=env_params,
                             seed=args.seed)

    episode_idx = 1
    this_episode_frame = 1

    if certify:
        certified = 0

    if dtype in UINTS:
        state_max = 1.0
        state_min = 0.0
    else:
        state_max = float('inf')
        state_min = float('-inf')

    for frame_idx in range(1, num_episodes * max_frames_per_episode + 1):

        state_tensor = torch.from_numpy(
            np.ascontiguousarray(state)).unsqueeze(0).cuda().to(torch.float32)
        # Normalize input pixel to 0-1
        if dtype in UINTS:
            state_tensor /= 255

        if test_config['attack']:
            attack_config['params']['robust_model'] = certify
            state_tensor = attack(model, state_tensor, attack_config)

        if certify:
            beta = training_config.get('convex_final_beta', 0)
            eps = attack_config['params']['epsilon']
            if env_id == 'Acrobot-v1':
                eps_v = get_acrobot_eps(eps)
                if USE_CUDA:
                    eps_v = eps_v.cuda()
            else:
                eps_v = eps
            state_ub = torch.clamp(state_tensor + eps_v, max=state_max)
            state_lb = torch.clamp(state_tensor - eps_v, min=state_min)

        action = model.act(state_tensor)[0]

        if certify:
            max_logit = torch.tensor([action])
            c = torch.eye(model.num_actions).type_as(
                state_tensor)[max_logit].unsqueeze(1) - torch.eye(
                    model.num_actions).type_as(state_tensor).unsqueeze(0)
            I = (~(max_logit.data.unsqueeze(1) == torch.arange(
                model.num_actions).type_as(max_logit.data).unsqueeze(0)))
            c = (c[I].view(state_tensor.size(0), model.num_actions - 1,
                           model.num_actions))
            logits_diff_lb = get_logits_lower_bound(model, state_tensor,
                                                    state_ub, state_lb, eps_v,
                                                    c, beta)
            if torch.min(logits_diff_lb[0], 0)[0].data.cpu().numpy() > 0:
                certified += 1

        if training_config['use_async_env']:
            async_env.async_step(action)
            next_state, reward, done, _ = async_env.wait_step()
        else:
            next_state, reward, done, _ = env.step(action)

        state = next_state
        episode_reward += reward

        if frame_idx % test_config['print_frame'] == 0:
            logger.log(
                '\ntotal frame {}/{}, episode {}/{}, episode frame{}/{}, latest episode reward: {:.6g}, avg 10 episode reward: {:.6g}'
                .format(
                    frame_idx, num_episodes * max_frames_per_episode,
                    episode_idx, num_episodes, this_episode_frame,
                    max_frames_per_episode,
                    all_rewards[-1] if all_rewards else np.nan,
                    np.average(all_rewards[:-11:-1])
                    if all_rewards else np.nan))
            if certify:
                logger.log(
                    'certified action: {}, certified action ratio: {:.6g}'.
                    format(certified, certified * 1.0 / frame_idx))

        if this_episode_frame == max_frames_per_episode:
            logger.log(
                'maximum number of frames reached in this episode, reset environment!'
            )
            done = True
            if training_config['use_async_env']:
                async_env.epi_reward = 0

        if done:
            logger.log('reseting env with seed', seed)
            if training_config['use_async_env']:
                state = async_env.reset()
            else:
                state = env.reset()
            all_rewards.append(episode_reward)
            episode_reward = 0
            this_episode_frame = 1
            episode_idx += 1
            if episode_idx > num_episodes:
                break
        else:
            this_episode_frame += 1

    logger.log('\navg reward' + (' and avg certify:' if certify else ':'))
    logger.log(np.mean(all_rewards), '+-', np.std(all_rewards))
    if certify:
        logger.log(certified * 1.0 / frame_idx)
コード例 #8
0
def cli_main(args=None):
    from pl_bolts.datamodules import CIFAR10DataModule, MNISTDataModule

    pl.seed_everything()

    parser = ArgumentParser()
    parser.add_argument("--dataset",
                        default="mnist",
                        type=str,
                        choices=["cifar10", "mnist"])
    parser.add_argument("--test", action='store_true', help='use test')
    parser.add_argument("--attack", action='store_true', help='test attack')
    parser.add_argument("--eps", type=float, default=0.1)
    parser.add_argument("--checkpoint", default=None, type=str)
    parser.add_argument("--pretrained", default=None, type=str)
    parser.add_argument("--name", default='', type=str)
    parser.add_argument("--log_graph",
                        action='store_true',
                        help='log computational graph to tensorboard')
    script_args, _ = parser.parse_known_args(args)

    if script_args.dataset == "cifar10":
        dm_cls = CIFAR10DataModule
    elif script_args.dataset == "mnist":
        dm_cls = MNISTDataModule
    else:
        raise ValueError(f"undefined dataset {script_args.dataset}")

    parser = VAE.add_model_specific_args(parser)
    parser = pl.Trainer.add_argparse_args(parser)
    args = parser.parse_args(args)

    dm = dm_cls.from_argparse_args(args)
    args.input_height = dm.size()[-1]

    if args.max_steps == -1:
        args.max_steps = None

    if args.test:
        model = VAE.load_from_checkpoint(args.checkpoint)
        trainer = pl.Trainer.from_argparse_args(args, logger=False)
        trainer.test(model, datamodule=dm)
    elif args.attack:
        from attacks import attack
        dm.setup('test')
        model = VAE.load_from_checkpoint(args.checkpoint)
        attack(model, dm.test_dataloader(), eps=args.eps)
    else:
        if args.pretrained:
            model = VAE(**vars(args)).from_pretrained(args.pretrained)
        else:
            model = VAE(**vars(args))
        logging_dir = args.dataset + 'logs/'
        from pytorch_lightning import loggers as pl_loggers
        logger_name = args.vae_type + '_' + args.name if len(
            args.name) > 0 else args.vae_type
        tb_logger = pl_loggers.TensorBoardLogger(args.dataset + '_tblogs/',
                                                 name=logger_name,
                                                 log_graph=args.log_graph)
        trainer = pl.Trainer.from_argparse_args(
            args,
            logger=tb_logger,
            default_root_dir=logging_dir,
            callbacks=[EarlyStopping(monitor='val_loss', patience=5)])
        trainer.fit(model, datamodule=dm)
コード例 #9
0
else:
    optimizer = optim.SGD(model.parameters(), lr=args.lr,
                          momentum=args.momentum)

best_valid_loss = np.inf
iteration = 0
epoch = 1

if args.test_mode:
    checkpoint = torch.load(args.chkpt_path)
    model.load_state_dict(checkpoint['net'].state_dict())
    # optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    model.eval()

# attack model
attack(test_loader, 0, model)

# trainint with early stopping
while (epoch < args.epochs + 1) and (iteration < args.patience) and not args.test_mode:
    train(train_loader, model, optimizer, epoch, args.cuda, args.log_interval)
    # attack model
    attack(test_loader, 0, model)
    valid_loss = test(valid_loader, model, args.cuda)
    if valid_loss > best_valid_loss:
        iteration += 1
        print('Loss was not improved, iteration {0}'.format(str(iteration)))
    else:
        print('Saving model...')
        iteration = 0
        best_valid_loss = valid_loss
        state = {
コード例 #10
0
ファイル: graph_simulation.py プロジェクト: joker-ace/vksgui
def run(args):
    return attack(*args)
コード例 #11
0
ファイル: graph_simulation.py プロジェクト: joker-ace/vksgui
#    graph)  # print info about the graph
#getPartitions(graph)
#edgesInCommunitiesGraph('ektor.txt','partitions.txt')
#G = nx.read_adjlist('adj_list.txt', delimiter=' ')
#G = nx.convert_node_labels_to_integers(graph)
#draw(G)
#G = nx.read_adjlist('community.txt', delimiter=' ')
#components = nx.connected_component_subgraphs(G)
#edgesInGroup('txtGraphs/samorodok_art_group_members_with_friends.txt')
#allFiles('kis.txt')
def run(args):
    return attack(*args)

if __name__ == '__main__':
    a = '''
    start = time.time()#
    print 'Starting attacks.'
    m.freeze_support()
    pool = m.Pool(2)#parallel modeling
    results = (pool.map(run,[(scaleFree, 'random'),(scaleFree, 'target')]))#
    pool.close()
    pool.join()
    print 'time: %.4f minutes'%((float(time.time())- float(start))/60.0)#print time''
    print ("Percolation threshold - %f" %results[1][1])
    printTargets(results[1][1], results[1][6])#print targets to file
    plot(results)#visualization of results'''

    results = attack(graph,'target')
    printTargets(results[1], results[6])
    #plot(results)#'''