def attack(input_img, len_x, target_txt, pert_type='2'):
    target_index_list = [
        np.asarray([c for c in encode(t)]) for t in target_txt
    ]
    with graph.as_default():
        adv_img = input_img.copy()
        m0 = np.zeros(input_img.shape)
        record_iter = np.zeros(input_img.shape[0])  # 0代表没成功

        start = time.time()
        for i in tqdm(range(nb_iter)):
            # perform attack
            batch_iter = len(input_img) // batch_size
            batch_iter = batch_iter if len(
                input_img) % batch_size == 0 else batch_iter + 1
            for batch_i in range(batch_iter):
                batch_input_img = input_img[batch_size * batch_i:batch_size *
                                            (batch_i + 1)]
                batch_adv_img = adv_img[batch_size * batch_i:batch_size *
                                        (batch_i + 1)]
                batch_len_x = len_x[batch_size * batch_i:batch_size *
                                    (batch_i + 1)]
                batch_m0 = m0[batch_size * batch_i:batch_size * (batch_i + 1)]
                batch_target_txt = target_txt[batch_size * batch_i:batch_size *
                                              (batch_i + 1)]
                batch_tmp_y = [
                    np.asarray([c - 1 for c in encode(t)])
                    for t in batch_target_txt
                ]
                batch_y = sparse_tuple_from(batch_tmp_y)
                batch_record_iter = record_iter[batch_size *
                                                batch_i:batch_size *
                                                (batch_i + 1)]

                scaled_perturbation = scaled_perturbation_2 if pert_type == '2' else scaled_perturbation_inf
                batch_pert = sess.run(
                    scaled_perturbation,  # pert type
                    feed_dict={
                        inputs: batch_adv_img,
                        input_seq_len: batch_len_x,
                        m: batch_m0,
                        targets: batch_y,
                        dropout_rate: 0,
                    })
                batch_pert[batch_pert > 0] = 0  ###########################3
                batch_pert[batch_record_iter != 0] = 0
                batch_adv_img = batch_adv_img + eps_iter * batch_pert
                batch_adv_img = batch_input_img + np.clip(
                    batch_adv_img - batch_input_img, -eps, eps)
                batch_adv_img = np.clip(batch_adv_img, clip_min, clip_max)
                adv_img[batch_size * batch_i:batch_size *
                        (batch_i + 1)] = batch_adv_img

            record_adv_text = []
            # check whether attack success
            for batch_i in range(batch_iter):
                batch_adv_img = adv_img[batch_size * batch_i:batch_size *
                                        (batch_i + 1)]
                batch_len_x = len_x[batch_size * batch_i:batch_size *
                                    (batch_i + 1)]
                batch_target_index = target_index_list[batch_size *
                                                       batch_i:batch_size *
                                                       (batch_i + 1)]
                batch_adv_text = sess.run(decoded,
                                          feed_dict={
                                              inputs: batch_adv_img,
                                              input_seq_len: batch_len_x,
                                              dropout_rate: 0,
                                          })
                batch_adv_index = TensorflowModel._TensorflowModel__sparse_to_lists(
                    batch_adv_text)
                record_adv_text += [
                    ''.join(decode(index)) for index in batch_adv_index
                ]
                for j in range(len(batch_target_index)):
                    # attack img idx_j successfully at iter i
                    idx_j = batch_size * batch_i + j
                    adv_index, target_index = batch_adv_index[
                        j], batch_target_index[j]
                    if np.sum(adv_index != target_index
                              ) == 0 and record_iter[idx_j] == 0:
                        record_iter[idx_j] = i
            # check whether all examples are successful
            if np.sum(record_iter == 0) == 0:
                break

        duration = time.time() - start
        print(f"{i} break. Time cost {duration:.4f} s")
    return adv_img, record_adv_text, record_iter, (duration, i)
        o_bestscore = [-1] * batch_size  # (batch_size, )
        o_bestattack = np.zeros(shape)
        o_bestiter = [-1] * batch_size

        for outer_step in range(BINARY_SEARCH_STEPS):  # 二分调整 const
            # completely reset adam's internal state.
            sess.run(init)
            batch = imgs[i:i + batch_size]
            batch_len_x = len_x[i:i + batch_size]
            batch_wm_mask = wm_mask_img[i:i + batch_size]
            batch_target_txt = target_txt[i:i + batch_size]
            batch_tmp_y = [
                np.asarray([c - 1 for c in encode(t)])
                for t in batch_target_txt
            ]
            batch_y = sparse_tuple_from(batch_tmp_y)

            bestl2 = [1e10] * batch_size  # (batch_size, )
            bestscore = [-1] * batch_size  # (batch_size, )
            bestiter = [-1] * batch_size
            bestasr = [-1] * MAX_ITERATIONS
            # print(f"Binary search step {outer_step} of {BINARY_SEARCH_STEPS}")

            # set the variables so that we don't have to send them over again
            sess.run(
                setup, {
                    assign_timg: batch,
                    assign_input_seq_len: batch_len_x,
                    assign_wm_mask: batch_wm_mask,
                    assign_const: CONST,
                })
    batch_iter = batch_iter if len(
        input_img) % batch_size == 0 else batch_iter + 1
    start = time.time()
    for batch_i in tqdm(range(batch_iter)):
        batch_input_img = input_img[batch_size * batch_i:batch_size *
                                    (batch_i + 1)]
        batch_adv_img = adv_img[batch_size * batch_i:batch_size *
                                (batch_i + 1)]
        batch_len_x = len_x[batch_size * batch_i:batch_size * (batch_i + 1)]
        batch_m0 = m0[batch_size * batch_i:batch_size * (batch_i + 1)]
        batch_target_text = target_txt[batch_size * batch_i:batch_size *
                                       (batch_i + 1)]
        batch_target_index = [
            np.asarray([c - 1 for c in encode(t)]) for t in batch_target_text
        ]
        batch_y = sparse_tuple_from(batch_target_index)
        batch_record_iter = record_iter[batch_size * batch_i:batch_size *
                                        (batch_i + 1)]

        scaled_perturbation = scaled_perturbation_2 if pert_type == '2' else scaled_perturbation_inf

        batch_record_iter = np.zeros(batch_size)
        for i in (range(nb_iter)):
            batch_pert, batch_adv_text = sess.run(
                [scaled_perturbation, decoded],  # pert type
                feed_dict={
                    inputs: batch_adv_img,
                    input_seq_len: batch_len_x,
                    m: batch_m0,
                    targets: batch_y,
                    dropout_rate: 0,