Beispiel #1
0
def main(args):
  """Main function which runs worker."""
  title = '## Starting evaluation of round {0} ##'.format(args.round_name)
  logging.info('\n'
               + '#' * len(title) + '\n'
               + '#' * len(title) + '\n'
               + '##' + ' ' * (len(title)-2) + '##' + '\n'
               + title + '\n'
               + '#' * len(title) + '\n'
               + '#' * len(title) + '\n'
               + '##' + ' ' * (len(title)-2) + '##' + '\n')
  if args.blacklisted_submissions:
    logging.warning('BLACKLISTED SUBMISSIONS: %s',
                    args.blacklisted_submissions)
  random.seed()
  logging.info('Running nvidia-docker to ensure that GPU works')
  shell_call(['docker', 'run', '--runtime=nvidia',
              '--rm', 'nvidia/cuda', 'nvidia-smi'])
  eval_worker = EvaluationWorker(
      worker_id=args.worker_id,
      storage_client=eval_lib.CompetitionStorageClient(
          args.project_id, args.storage_bucket),
      datastore_client=eval_lib.CompetitionDatastoreClient(
          args.project_id, args.round_name),
      storage_bucket=args.storage_bucket,
      round_name=args.round_name,
      dataset_name=args.dataset_name,
      blacklisted_submissions=args.blacklisted_submissions,
      num_defense_shards=args.num_defense_shards)
  eval_worker.run_work()
Beispiel #2
0
 def read_dataset_metadata(self):
   if self.dataset_meta:
     return
   shell_call(['gsutil', 'cp',
               'gs://' + self.storage_client.bucket_name + '/'
               + 'dataset/' + self.dataset_name + '_dataset.csv',
               LOCAL_DATASET_METADATA_FILE])
   with open(LOCAL_DATASET_METADATA_FILE, 'r') as f:
     self.dataset_meta = eval_lib.DatasetMetadata(f)
Beispiel #3
0
def test_format_pep8():
    """
  Test if pep8 is respected.
  """
    files_to_check = []
    module_dir = cleverhans_copy.__path__[0]
    for path in list_files(".py"):
        rel_path = os.path.relpath(path, module_dir)
        if rel_path in whitelist_pep8:
            continue
        else:
            files_to_check.append(path)
    repo_dir = os.path.join(module_dir, os.pardir)
    rcpath = os.path.join(repo_dir, '.pylintrc')
    assert os.path.exists(rcpath)

    # We must run pylint via the command line and subprocess because of
    # problems with the pylint module.
    # The documentation claims you can run it as a python module, but
    # the documentation is wrong: https://github.com/PyCQA/pylint/issues/1870
    # If you run the version described in the linked issue, pylint
    # calls sys.exit once it is done, so it kills the test.

    # Running all files in one pylint command is important for 2 reasons:
    # 1) Correctness: pylint can detect issues that require access to multiple
    #    files, such as cyclic imports
    # 2) Speed: pylint imports modules for deep analysis, so if you run
    #    multiple subprocesses each needs to re-import tensorflow.
    # On Ian's laptop, pylint takes about 10s per file to run on the repo,
    # and there are about 90 files as of the writing of this comment.
    # Running pylint on all files simultaneously takes about 70s, so it
    # is a little better than a 10X speedup.

    # Running multiple jobs in parallel helps but far less than linearly.
    # On Ian's 4-core laptop, running 4 jobs drops the runtime from 70s
    # to 45s.
    # Some of the work is I/O, so it actually makes some sense to run
    # more jobs than cores. On Ian's 4-core laptop, running 8 jobs drops
    # the runtime to 40s.
    # There's a further complication though: I think each job needs to
    # redo imports, so the total amount of work to do increases with
    # the number of jobs. On Ian's laptop, using 64 jobs causes the
    # runtime to increase to 220s. There is not an obvious simple
    # formula like "use one job per CPU core" or "use way more jobs
    # than cores to saturate I/O". For now I'm hoping that 8 will be
    # a reasonable default: it gets good performance on my laptop,
    # and on machines with fewer than 4 cores there should still be
    # a benefit to not being blocked on I/O.

    try:
        shell_call(['pylint', '--rcfile', rcpath, '--jobs', '8'] +
                   files_to_check)
    except subprocess.CalledProcessError as e:
        raise ValueError(e.output.decode("utf-8"))
Beispiel #4
0
  def temp_copy_extracted_submission(self):
    """Creates a temporary copy of extracted submission.

    When executed, submission is allowed to modify it's own directory. So
    to ensure that submission does not pass any data between runs, new
    copy of the submission is made before each run. After a run temporary copy
    of submission is deleted.

    Returns:
      directory where temporary copy is located
    """
    tmp_copy_dir = os.path.join(self.submission_dir, 'tmp_copy')
    shell_call(['cp', '-R', os.path.join(self.extracted_submission_dir),
                tmp_copy_dir])
    return tmp_copy_dir
Beispiel #5
0
def make_directory_writable(dirname):
  """Makes directory readable and writable by everybody.

  If you run something inside Docker container and it writes files, then
  these files will be written as root user with restricted permissions.
  So to be able to read/modify these files outside of Docker you have to change
  permissions to be world readable and writable.

  Args:
    dirname: name of the directory

  Returns:
    True if operation was successfull
  """
  shell_call(['docker', 'run', '-v',
              '{0}:/output_dir'.format(dirname),
              'busybox:1.27.2',
              'chmod', '-R', 'a+rwx', '/output_dir'])
Beispiel #6
0
  def run_defenses(self):
    """Method which evaluates all defense work.

    In a loop this method queries not completed defense work,
    picks one defense work and runs it.
    """
    logging.info('******** Start evaluation of defenses ********')
    prev_submission_id = None
    need_reload_work = True
    while True:
      # wait until work is available
      if need_reload_work:
        if self.num_defense_shards:
          shard_with_work = self.defense_work.read_undone_from_datastore(
              shard_id=(self.worker_id % self.num_defense_shards),
              num_shards=self.num_defense_shards)
        else:
          shard_with_work = self.defense_work.read_undone_from_datastore()
        logging.info('Loaded %d records of undone work from shard %s',
                     len(self.defense_work), str(shard_with_work))
      if not self.defense_work.work:
        logging.info('Work is not populated, waiting...')
        time.sleep(SLEEP_TIME)
        continue
      if self.defense_work.is_all_work_competed():
        logging.info('All defense work completed.')
        break
      # download all defense data and dataset
      self.fetch_defense_data()
      need_reload_work = False
      # pick piece of work
      work_id = self.defense_work.try_pick_piece_of_work(
          self.worker_id, submission_id=prev_submission_id)
      if not work_id:
        need_reload_work = True
        logging.info('Failed to pick work, waiting...')
        time.sleep(SLEEP_TIME_SHORT)
        continue
      logging.info('Selected work_id: %s', work_id)
      # execute work
      try:
        elapsed_time_sec, prev_submission_id, batch_result = (
            self.run_defense_work(work_id))
        logging.info('Work %s is done', work_id)
        # indicate that work is completed
        is_work_update = self.defense_work.update_work_as_completed(
            self.worker_id, work_id,
            other_values={'elapsed_time': elapsed_time_sec,
                          'stat_correct': batch_result[0],
                          'stat_error': batch_result[1],
                          'stat_target_class': batch_result[2],
                          'stat_num_images': batch_result[3]})
      except WorkerError as e:
        logging.info('Failed to run work:\n%s', str(e))
        if str(e).startswith('Docker returned non-zero retval'):
          logging.info('Running nvidia-docker to ensure that GPU works')
          shell_call(['nvidia-docker', 'run', '--rm', 'nvidia/cuda',
                      'nvidia-smi'])
        is_work_update = self.defense_work.update_work_as_completed(
            self.worker_id, work_id, error=str(e))
      if not is_work_update:
        logging.warning('Can''t update work "%s" as completed by worker %d',
                        work_id, self.worker_id)
        need_reload_work = True
    logging.info('******** Finished evaluation of defenses ********')
Beispiel #7
0
  def run_defense_work(self, work_id):
    """Runs one defense work.

    Args:
      work_id: ID of the piece of work to run

    Returns:
      elapsed_time_sec, submission_id - elapsed time and id of the submission

    Raises:
      WorkerError: if error occurred during execution.
    """
    class_batch_id = (
        self.defense_work.work[work_id]['output_classification_batch_id'])
    class_batch = self.class_batches.read_batch_from_datastore(class_batch_id)
    adversarial_batch_id = class_batch['adversarial_batch_id']
    submission_id = class_batch['submission_id']
    cloud_result_path = class_batch['result_path']
    logging.info('Defense work piece: '
                 'adversarial_batch_id="%s" submission_id="%s"',
                 adversarial_batch_id, submission_id)
    if submission_id in self.blacklisted_submissions:
      raise WorkerError('Blacklisted submission')
    # get defense
    defense = DefenseSubmission(submission_id, self.submissions,
                                self.storage_bucket)
    defense.download()
    # prepare input - copy adversarial batch locally
    input_dir = os.path.join(LOCAL_INPUT_DIR, adversarial_batch_id)
    if os.path.exists(input_dir):
      sudo_remove_dirtree(input_dir)
    os.makedirs(input_dir)
    try:
      shell_call([
          'gsutil', '-m', 'cp',
          # typical location of adv batch:
          # testing-round/adversarial_images/ADVBATCH000/
          os.path.join('gs://', self.storage_bucket, self.round_name,
                       'adversarial_images', adversarial_batch_id, '*'),
          input_dir
      ])
      adv_images_files = os.listdir(input_dir)
      if (len(adv_images_files) == 1) and adv_images_files[0].endswith('.zip'):
        logging.info('Adversarial batch is in zip archive %s',
                     adv_images_files[0])
        shell_call([
            'unzip', os.path.join(input_dir, adv_images_files[0]),
            '-d', input_dir
        ])
        os.remove(os.path.join(input_dir, adv_images_files[0]))
        adv_images_files = os.listdir(input_dir)
      logging.info('%d adversarial images copied', len(adv_images_files))
    except (subprocess.CalledProcessError, IOError) as e:
      raise WorkerError('Can''t copy adversarial batch locally', e)
    # prepare output directory
    if os.path.exists(LOCAL_OUTPUT_DIR):
      sudo_remove_dirtree(LOCAL_OUTPUT_DIR)
    os.mkdir(LOCAL_OUTPUT_DIR)
    output_filname = os.path.join(LOCAL_OUTPUT_DIR, 'result.csv')
    # run defense
    elapsed_time_sec = defense.run(input_dir, output_filname)
    # evaluate defense result
    batch_result = eval_lib.analyze_one_classification_result(
        storage_client=None,
        file_path=output_filname,
        adv_batch=self.adv_batches.data[adversarial_batch_id],
        dataset_batches=self.dataset_batches,
        dataset_meta=self.dataset_meta)
    # copy result of the defense into storage
    try:
      shell_call([
          'gsutil', 'cp', output_filname,
          os.path.join('gs://', self.storage_bucket, cloud_result_path)
      ])
    except subprocess.CalledProcessError as e:
      raise WorkerError('Can''t result to Cloud Storage', e)
    return elapsed_time_sec, submission_id, batch_result
Beispiel #8
0
  def run_attack_work(self, work_id):
    """Runs one attack work.

    Args:
      work_id: ID of the piece of work to run

    Returns:
      elapsed_time_sec, submission_id - elapsed time and id of the submission

    Raises:
      WorkerError: if error occurred during execution.
    """
    adv_batch_id = (
        self.attack_work.work[work_id]['output_adversarial_batch_id'])
    adv_batch = self.adv_batches[adv_batch_id]
    dataset_batch_id = adv_batch['dataset_batch_id']
    submission_id = adv_batch['submission_id']
    epsilon = self.dataset_batches[dataset_batch_id]['epsilon']
    logging.info('Attack work piece: '
                 'dataset_batch_id="%s" submission_id="%s" '
                 'epsilon=%d', dataset_batch_id, submission_id, epsilon)
    if submission_id in self.blacklisted_submissions:
      raise WorkerError('Blacklisted submission')
    # get attack
    attack = AttackSubmission(submission_id, self.submissions,
                              self.storage_bucket)
    attack.download()
    # prepare input
    input_dir = os.path.join(LOCAL_DATASET_DIR, dataset_batch_id)
    if attack.type == TYPE_TARGETED:
      # prepare file with target classes
      target_class_filename = os.path.join(input_dir, 'target_class.csv')
      self.dataset_meta.save_target_classes_for_batch(target_class_filename,
                                                      self.dataset_batches,
                                                      dataset_batch_id)
    # prepare output directory
    if os.path.exists(LOCAL_OUTPUT_DIR):
      sudo_remove_dirtree(LOCAL_OUTPUT_DIR)
    os.mkdir(LOCAL_OUTPUT_DIR)
    if os.path.exists(LOCAL_PROCESSED_OUTPUT_DIR):
      shutil.rmtree(LOCAL_PROCESSED_OUTPUT_DIR)
    os.mkdir(LOCAL_PROCESSED_OUTPUT_DIR)
    if os.path.exists(LOCAL_ZIPPED_OUTPUT_DIR):
      shutil.rmtree(LOCAL_ZIPPED_OUTPUT_DIR)
    os.mkdir(LOCAL_ZIPPED_OUTPUT_DIR)
    # run attack
    elapsed_time_sec = attack.run(input_dir, LOCAL_OUTPUT_DIR, epsilon)
    if attack.type == TYPE_TARGETED:
      # remove target class file
      os.remove(target_class_filename)
    # enforce epsilon and compute hashes
    image_hashes = eval_lib.enforce_epsilon_and_compute_hash(
        input_dir, LOCAL_OUTPUT_DIR, LOCAL_PROCESSED_OUTPUT_DIR, epsilon)
    if not image_hashes:
      logging.warning('No images saved by the attack.')
      return elapsed_time_sec, submission_id
    # write images back to datastore
    # rename images and add information to adversarial batch
    for clean_image_id, hash_val in iteritems(image_hashes):
      # we will use concatenation of batch_id and image_id
      # as adversarial image id and as a filename of adversarial images
      adv_img_id = adv_batch_id + '_' + clean_image_id
      # rename the image
      os.rename(
          os.path.join(LOCAL_PROCESSED_OUTPUT_DIR, clean_image_id + '.png'),
          os.path.join(LOCAL_PROCESSED_OUTPUT_DIR, adv_img_id + '.png'))
      # populate values which will be written to datastore
      image_path = '{0}/adversarial_images/{1}/{1}.zip/{2}.png'.format(
          self.round_name, adv_batch_id, adv_img_id)
      adv_batch['images'][adv_img_id] = {
          'clean_image_id': unicode(clean_image_id),
          'image_path': unicode(image_path),
          'image_hash': unicode(hash_val),
      }
    # archive all images and copy to storage
    zipped_images_filename = os.path.join(LOCAL_ZIPPED_OUTPUT_DIR,
                                          adv_batch_id + '.zip')
    try:
      logging.debug('Compressing adversarial images to %s',
                    zipped_images_filename)
      shell_call([
          'zip', '-j', '-r', zipped_images_filename,
          LOCAL_PROCESSED_OUTPUT_DIR])
    except subprocess.CalledProcessError as e:
      raise WorkerError('Can''t make archive from adversarial iamges', e)
    # upload archive to storage
    dst_filename = '{0}/adversarial_images/{1}/{1}.zip'.format(
        self.round_name, adv_batch_id)
    logging.debug(
        'Copying archive with adversarial images to %s', dst_filename)
    self.storage_client.new_blob(dst_filename).upload_from_filename(
        zipped_images_filename)
    # writing adv batch to datastore
    logging.debug('Writing adversarial batch to datastore')
    self.adv_batches.write_single_batch_images_to_datastore(adv_batch_id)
    return elapsed_time_sec, submission_id
Beispiel #9
0
  def download(self):
    """Method which downloads submission to local directory."""
    # Structure of the download directory:
    # submission_dir=LOCAL_SUBMISSIONS_DIR/submission_id
    # submission_dir/s.ext   <-- archived submission
    # submission_dir/extracted      <-- extracted submission

    # Check whether submission is already there
    if self.extracted_submission_dir:
      return
    self.submission_dir = os.path.join(LOCAL_SUBMISSIONS_DIR,
                                       self.submission_id)
    if (os.path.isdir(self.submission_dir)
        and os.path.isdir(os.path.join(self.submission_dir, 'extracted'))):
      # submission already there, just re-read metadata
      self.extracted_submission_dir = os.path.join(self.submission_dir,
                                                   'extracted')
      with open(os.path.join(self.extracted_submission_dir, 'metadata.json'),
                'r') as f:
        meta_json = json.load(f)
      self.container_name = str(meta_json[METADATA_CONTAINER])
      self.entry_point = str(meta_json[METADATA_ENTRY_POINT])
      return
    # figure out submission location in the Cloud and determine extractor
    submission_cloud_path = os.path.join('gs://', self.storage_bucket,
                                         self.submission.path)
    extract_command_tmpl = None
    extension = None
    for k, v in iteritems(EXTRACT_COMMAND):
      if submission_cloud_path.endswith(k):
        extension = k
        extract_command_tmpl = v
        break
    if not extract_command_tmpl:
      raise WorkerError('Unsupported submission extension')
    # download archive
    try:
      os.makedirs(self.submission_dir)
      tmp_extract_dir = os.path.join(self.submission_dir, 'tmp')
      os.makedirs(tmp_extract_dir)
      download_path = os.path.join(self.submission_dir, 's' + extension)
      try:
        logging.info('Downloading submission from %s to %s',
                     submission_cloud_path, download_path)
        shell_call(['gsutil', 'cp', submission_cloud_path, download_path])
      except subprocess.CalledProcessError as e:
        raise WorkerError('Can''t copy submission locally', e)
      # extract archive
      try:
        shell_call(extract_command_tmpl,
                   src=download_path, dst=tmp_extract_dir)
      except subprocess.CalledProcessError as e:
        # proceed even if extraction returned non zero error code,
        # sometimes it's just warning
        logging.warning('Submission extraction returned non-zero error code. '
                        'It may be just a warning, continuing execution. '
                        'Error: %s', e)
      try:
        make_directory_writable(tmp_extract_dir)
      except subprocess.CalledProcessError as e:
        raise WorkerError('Can''t make submission directory writable', e)
      # determine root of the submission
      tmp_root_dir = tmp_extract_dir
      root_dir_content = [d for d in os.listdir(tmp_root_dir)
                          if d != '__MACOSX']
      if (len(root_dir_content) == 1
          and os.path.isdir(os.path.join(tmp_root_dir, root_dir_content[0]))):
        tmp_root_dir = os.path.join(tmp_root_dir, root_dir_content[0])
      # move files to extract subdirectory
      self.extracted_submission_dir = os.path.join(self.submission_dir,
                                                   'extracted')
      try:
        shell_call(['mv', os.path.join(tmp_root_dir),
                    self.extracted_submission_dir])
      except subprocess.CalledProcessError as e:
        raise WorkerError('Can''t move submission files', e)
      # read metadata file
      try:
        with open(os.path.join(self.extracted_submission_dir, 'metadata.json'),
                  'r') as f:
          meta_json = json.load(f)
      except IOError as e:
        raise WorkerError(
            'Can''t read metadata.json for submission "{0}"'.format(
                self.submission_id),
            e)
      try:
        self.container_name = str(meta_json[METADATA_CONTAINER])
        self.entry_point = str(meta_json[METADATA_ENTRY_POINT])
        type_from_meta = METADATA_JSON_TYPE_TO_TYPE[meta_json[METADATA_TYPE]]
      except KeyError as e:
        raise WorkerError('Invalid metadata.json file', e)
      if type_from_meta != self.type:
        raise WorkerError('Inconsistent submission type in metadata: '
                          + type_from_meta + ' vs ' + self.type)
    except WorkerError as e:
      self.extracted_submission_dir = None
      sudo_remove_dirtree(self.submission_dir)
      raise
Beispiel #10
0
def kill_docker_container(container_name):
  """Kills given docker container."""
  docker_id = get_id_of_running_docker(container_name)
  shell_call([DOCKER_BINARY, 'stop', docker_id])
Beispiel #11
0
def get_id_of_running_docker(container_name):
  """Returns ID of running docker container."""
  return shell_call([DOCKER_BINARY,
                     'ps',
                     '-q',
                     '--filter=name={}'.format(container_name)]).strip()
Beispiel #12
0
"""
Run this script to run autopep8 on everything in the library
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from cleverhans_copy.devtools.list_files import list_files
from cleverhans_copy.utils import shell_call

for f in list_files(".py"):

    command = ["autopep8", "-i", "--indent-size", "2", f]
    shell_call(command)