def make_directory_writable(dirname): """Makes directory readable and writable by everybody. If you run something inside Docker container and it writes files, then these files will be written as root user with restricted permissions. So to be able to read/modify these files outside of Docker you have to change permissions to be world readable and writable. Args: dirname: name of the directory Returns: True if operation was successfull """ shell_call([ "docker", "run", "-v", "{0}:/output_dir".format(dirname), "busybox:1.27.2", "chmod", "-R", "a+rwx", "/output_dir", ])
def main(args): """Main function which runs worker.""" title = '## Starting evaluation of round {0} ##'.format(args.round_name) logging.info('\n' + '#' * len(title) + '\n' + '#' * len(title) + '\n' + '##' + ' ' * (len(title)-2) + '##' + '\n' + title + '\n' + '#' * len(title) + '\n' + '#' * len(title) + '\n' + '##' + ' ' * (len(title)-2) + '##' + '\n') if args.blacklisted_submissions: logging.warning('BLACKLISTED SUBMISSIONS: %s', args.blacklisted_submissions) random.seed() logging.info('Running nvidia-docker to ensure that GPU works') shell_call(['docker', 'run', '--runtime=nvidia', '--rm', 'nvidia/cuda', 'nvidia-smi']) eval_worker = EvaluationWorker( worker_id=args.worker_id, storage_client=eval_lib.CompetitionStorageClient( args.project_id, args.storage_bucket), datastore_client=eval_lib.CompetitionDatastoreClient( args.project_id, args.round_name), storage_bucket=args.storage_bucket, round_name=args.round_name, dataset_name=args.dataset_name, blacklisted_submissions=args.blacklisted_submissions, num_defense_shards=args.num_defense_shards) eval_worker.run_work()
def main(args): """Main function which runs worker.""" title = "## Starting evaluation of round {0} ##".format(args.round_name) logging.info("\n" + "#" * len(title) + "\n" + "#" * len(title) + "\n" + "##" + " " * (len(title) - 2) + "##" + "\n" + title + "\n" + "#" * len(title) + "\n" + "#" * len(title) + "\n" + "##" + " " * (len(title) - 2) + "##" + "\n") if args.blacklisted_submissions: logging.warning("BLACKLISTED SUBMISSIONS: %s", args.blacklisted_submissions) random.seed() logging.info("Running nvidia-docker to ensure that GPU works") shell_call([ "docker", "run", "--runtime=nvidia", "--rm", "nvidia/cuda", "nvidia-smi" ]) eval_worker = EvaluationWorker( worker_id=args.worker_id, storage_client=eval_lib.CompetitionStorageClient( args.project_id, args.storage_bucket), datastore_client=eval_lib.CompetitionDatastoreClient( args.project_id, args.round_name), storage_bucket=args.storage_bucket, round_name=args.round_name, dataset_name=args.dataset_name, blacklisted_submissions=args.blacklisted_submissions, num_defense_shards=args.num_defense_shards, ) eval_worker.run_work()
def read_dataset_metadata(self): """Read `dataset_meta` field from bucket""" if self.dataset_meta: return shell_call(['gsutil', 'cp', 'gs://' + self.storage_client.bucket_name + '/' + 'dataset/' + self.dataset_name + '_dataset.csv', LOCAL_DATASET_METADATA_FILE]) with open(LOCAL_DATASET_METADATA_FILE, 'r') as f: self.dataset_meta = eval_lib.DatasetMetadata(f)
def test_format_pep8(): """ Test if pep8 is respected. """ files_to_check = [] module_dir = cleverhans.__path__[0] for path in all_py_files: rel_path = os.path.relpath(path, module_dir) if rel_path in whitelist_pep8: continue else: files_to_check.append(path) repo_dir = os.path.join(module_dir, os.pardir) rcpath = os.path.join(repo_dir, '.pylintrc') assert os.path.exists(rcpath) # We must run pylint via the command line and subprocess because of # problems with the pylint module. # The documentation claims you can run it as a python module, but # the documentation is wrong: https://github.com/PyCQA/pylint/issues/1870 # If you run the version described in the linked issue, pylint # calls sys.exit once it is done, so it kills the test. # Running all files in one pylint command is important for 2 reasons: # 1) Correctness: pylint can detect issues that require access to multiple # files, such as cyclic imports # 2) Speed: pylint imports modules for deep analysis, so if you run # multiple subprocesses each needs to re-import tensorflow. # On Ian's laptop, pylint takes about 10s per file to run on the repo, # and there are about 90 files as of the writing of this comment. # Running pylint on all files simultaneously takes about 70s, so it # is a little better than a 10X speedup. # Running multiple jobs in parallel helps but far less than linearly. # On Ian's 4-core laptop, running 4 jobs drops the runtime from 70s # to 45s. # Some of the work is I/O, so it actually makes some sense to run # more jobs than cores. On Ian's 4-core laptop, running 8 jobs drops # the runtime to 40s. # There's a further complication though: I think each job needs to # redo imports, so the total amount of work to do increases with # the number of jobs. On Ian's laptop, using 64 jobs causes the # runtime to increase to 220s. There is not an obvious simple # formula like "use one job per CPU core" or "use way more jobs # than cores to saturate I/O". For now I'm hoping that 8 will be # a reasonable default: it gets good performance on my laptop, # and on machines with fewer than 4 cores there should still be # a benefit to not being blocked on I/O. try: shell_call(['pylint', '--rcfile', rcpath, '--jobs', '8'] + files_to_check) except subprocess.CalledProcessError as e: raise ValueError(e.output.decode("utf-8"))
def read_dataset_metadata(self): """Read `dataset_meta` field from bucket""" if self.dataset_meta: return shell_call([ "gsutil", "cp", "gs://" + self.storage_client.bucket_name + "/" + "dataset/" + self.dataset_name + "_dataset.csv", LOCAL_DATASET_METADATA_FILE, ]) with open(LOCAL_DATASET_METADATA_FILE, "r") as f: self.dataset_meta = eval_lib.DatasetMetadata(f)
def temp_copy_extracted_submission(self): """Creates a temporary copy of extracted submission. When executed, submission is allowed to modify it's own directory. So to ensure that submission does not pass any data between runs, new copy of the submission is made before each run. After a run temporary copy of submission is deleted. Returns: directory where temporary copy is located """ tmp_copy_dir = os.path.join(self.submission_dir, 'tmp_copy') shell_call(['cp', '-R', os.path.join(self.extracted_submission_dir), tmp_copy_dir]) return tmp_copy_dir
def show(ndarray, min_val=None, max_val=None): """ Display an image. :param ndarray: The image as an ndarray :param min_val: The minimum pixel value in the image format :param max_val: The maximum pixel valie in the image format If min_val and max_val are not specified, attempts to infer whether the image is in any of the common ranges: [0, 1], [-1, 1], [0, 255] This can be ambiguous, so it is better to specify if known. """ # Create a temporary file with the suffix '.png'. fd, path = mkstemp(suffix='.png') os.close(fd) save(path, ndarray, min_val, max_val) shell_call(VIEWER_COMMAND + [path])
""" Run this script to run autopep8 on everything in the library """ from __future__ import absolute_import from __future__ import division from __future__ import print_function from cleverhans.devtools.list_files import list_files from cleverhans.utils import shell_call for f in list_files(".py"): command = ["autopep8", "-i", "--indent-size", "2", f] shell_call(command)
def run_defenses(self): """Method which evaluates all defense work. In a loop this method queries not completed defense work, picks one defense work and runs it. """ logging.info('******** Start evaluation of defenses ********') prev_submission_id = None need_reload_work = True while True: # wait until work is available if need_reload_work: if self.num_defense_shards: shard_with_work = self.defense_work.read_undone_from_datastore( shard_id=(self.worker_id % self.num_defense_shards), num_shards=self.num_defense_shards) else: shard_with_work = self.defense_work.read_undone_from_datastore() logging.info('Loaded %d records of undone work from shard %s', len(self.defense_work), str(shard_with_work)) if not self.defense_work.work: logging.info('Work is not populated, waiting...') time.sleep(SLEEP_TIME) continue if self.defense_work.is_all_work_competed(): logging.info('All defense work completed.') break # download all defense data and dataset self.fetch_defense_data() need_reload_work = False # pick piece of work work_id = self.defense_work.try_pick_piece_of_work( self.worker_id, submission_id=prev_submission_id) if not work_id: need_reload_work = True logging.info('Failed to pick work, waiting...') time.sleep(SLEEP_TIME_SHORT) continue logging.info('Selected work_id: %s', work_id) # execute work try: elapsed_time_sec, prev_submission_id, batch_result = ( self.run_defense_work(work_id)) logging.info('Work %s is done', work_id) # indicate that work is completed is_work_update = self.defense_work.update_work_as_completed( self.worker_id, work_id, other_values={'elapsed_time': elapsed_time_sec, 'stat_correct': batch_result[0], 'stat_error': batch_result[1], 'stat_target_class': batch_result[2], 'stat_num_images': batch_result[3]}) except WorkerError as e: logging.info('Failed to run work:\n%s', str(e)) if str(e).startswith('Docker returned non-zero retval'): logging.info('Running nvidia-docker to ensure that GPU works') shell_call(['nvidia-docker', 'run', '--rm', 'nvidia/cuda', 'nvidia-smi']) is_work_update = self.defense_work.update_work_as_completed( self.worker_id, work_id, error=str(e)) if not is_work_update: logging.warning('Can''t update work "%s" as completed by worker %d', work_id, self.worker_id) need_reload_work = True logging.info('******** Finished evaluation of defenses ********')
def run_defense_work(self, work_id): """Runs one defense work. Args: work_id: ID of the piece of work to run Returns: elapsed_time_sec, submission_id - elapsed time and id of the submission Raises: WorkerError: if error occurred during execution. """ class_batch_id = ( self.defense_work.work[work_id]['output_classification_batch_id']) class_batch = self.class_batches.read_batch_from_datastore(class_batch_id) adversarial_batch_id = class_batch['adversarial_batch_id'] submission_id = class_batch['submission_id'] cloud_result_path = class_batch['result_path'] logging.info('Defense work piece: ' 'adversarial_batch_id="%s" submission_id="%s"', adversarial_batch_id, submission_id) if submission_id in self.blacklisted_submissions: raise WorkerError('Blacklisted submission') # get defense defense = DefenseSubmission(submission_id, self.submissions, self.storage_bucket) defense.download() # prepare input - copy adversarial batch locally input_dir = os.path.join(LOCAL_INPUT_DIR, adversarial_batch_id) if os.path.exists(input_dir): sudo_remove_dirtree(input_dir) os.makedirs(input_dir) try: shell_call([ 'gsutil', '-m', 'cp', # typical location of adv batch: # testing-round/adversarial_images/ADVBATCH000/ os.path.join('gs://', self.storage_bucket, self.round_name, 'adversarial_images', adversarial_batch_id, '*'), input_dir ]) adv_images_files = os.listdir(input_dir) if (len(adv_images_files) == 1) and adv_images_files[0].endswith('.zip'): logging.info('Adversarial batch is in zip archive %s', adv_images_files[0]) shell_call([ 'unzip', os.path.join(input_dir, adv_images_files[0]), '-d', input_dir ]) os.remove(os.path.join(input_dir, adv_images_files[0])) adv_images_files = os.listdir(input_dir) logging.info('%d adversarial images copied', len(adv_images_files)) except (subprocess.CalledProcessError, IOError) as e: raise WorkerError('Can''t copy adversarial batch locally', e) # prepare output directory if os.path.exists(LOCAL_OUTPUT_DIR): sudo_remove_dirtree(LOCAL_OUTPUT_DIR) os.mkdir(LOCAL_OUTPUT_DIR) output_filname = os.path.join(LOCAL_OUTPUT_DIR, 'result.csv') # run defense elapsed_time_sec = defense.run(input_dir, output_filname) # evaluate defense result batch_result = eval_lib.analyze_one_classification_result( storage_client=None, file_path=output_filname, adv_batch=self.adv_batches.data[adversarial_batch_id], dataset_batches=self.dataset_batches, dataset_meta=self.dataset_meta) # copy result of the defense into storage try: shell_call([ 'gsutil', 'cp', output_filname, os.path.join('gs://', self.storage_bucket, cloud_result_path) ]) except subprocess.CalledProcessError as e: raise WorkerError('Can''t result to Cloud Storage', e) return elapsed_time_sec, submission_id, batch_result
def run_attack_work(self, work_id): """Runs one attack work. Args: work_id: ID of the piece of work to run Returns: elapsed_time_sec, submission_id - elapsed time and id of the submission Raises: WorkerError: if error occurred during execution. """ adv_batch_id = ( self.attack_work.work[work_id]['output_adversarial_batch_id']) adv_batch = self.adv_batches[adv_batch_id] dataset_batch_id = adv_batch['dataset_batch_id'] submission_id = adv_batch['submission_id'] epsilon = self.dataset_batches[dataset_batch_id]['epsilon'] logging.info('Attack work piece: ' 'dataset_batch_id="%s" submission_id="%s" ' 'epsilon=%d', dataset_batch_id, submission_id, epsilon) if submission_id in self.blacklisted_submissions: raise WorkerError('Blacklisted submission') # get attack attack = AttackSubmission(submission_id, self.submissions, self.storage_bucket) attack.download() # prepare input input_dir = os.path.join(LOCAL_DATASET_DIR, dataset_batch_id) if attack.type == TYPE_TARGETED: # prepare file with target classes target_class_filename = os.path.join(input_dir, 'target_class.csv') self.dataset_meta.save_target_classes_for_batch(target_class_filename, self.dataset_batches, dataset_batch_id) # prepare output directory if os.path.exists(LOCAL_OUTPUT_DIR): sudo_remove_dirtree(LOCAL_OUTPUT_DIR) os.mkdir(LOCAL_OUTPUT_DIR) if os.path.exists(LOCAL_PROCESSED_OUTPUT_DIR): shutil.rmtree(LOCAL_PROCESSED_OUTPUT_DIR) os.mkdir(LOCAL_PROCESSED_OUTPUT_DIR) if os.path.exists(LOCAL_ZIPPED_OUTPUT_DIR): shutil.rmtree(LOCAL_ZIPPED_OUTPUT_DIR) os.mkdir(LOCAL_ZIPPED_OUTPUT_DIR) # run attack elapsed_time_sec = attack.run(input_dir, LOCAL_OUTPUT_DIR, epsilon) if attack.type == TYPE_TARGETED: # remove target class file os.remove(target_class_filename) # enforce epsilon and compute hashes image_hashes = eval_lib.enforce_epsilon_and_compute_hash( input_dir, LOCAL_OUTPUT_DIR, LOCAL_PROCESSED_OUTPUT_DIR, epsilon) if not image_hashes: logging.warning('No images saved by the attack.') return elapsed_time_sec, submission_id # write images back to datastore # rename images and add information to adversarial batch for clean_image_id, hash_val in iteritems(image_hashes): # we will use concatenation of batch_id and image_id # as adversarial image id and as a filename of adversarial images adv_img_id = adv_batch_id + '_' + clean_image_id # rename the image os.rename( os.path.join(LOCAL_PROCESSED_OUTPUT_DIR, clean_image_id + '.png'), os.path.join(LOCAL_PROCESSED_OUTPUT_DIR, adv_img_id + '.png')) # populate values which will be written to datastore image_path = '{0}/adversarial_images/{1}/{1}.zip/{2}.png'.format( self.round_name, adv_batch_id, adv_img_id) # u'' + foo is a a python 2/3 compatible way of casting foo to unicode adv_batch['images'][adv_img_id] = { 'clean_image_id': u'' + str(clean_image_id), 'image_path': u'' + str(image_path), 'image_hash': u'' + str(hash_val), } # archive all images and copy to storage zipped_images_filename = os.path.join(LOCAL_ZIPPED_OUTPUT_DIR, adv_batch_id + '.zip') try: logging.debug('Compressing adversarial images to %s', zipped_images_filename) shell_call([ 'zip', '-j', '-r', zipped_images_filename, LOCAL_PROCESSED_OUTPUT_DIR]) except subprocess.CalledProcessError as e: raise WorkerError('Can''t make archive from adversarial iamges', e) # upload archive to storage dst_filename = '{0}/adversarial_images/{1}/{1}.zip'.format( self.round_name, adv_batch_id) logging.debug( 'Copying archive with adversarial images to %s', dst_filename) self.storage_client.new_blob(dst_filename).upload_from_filename( zipped_images_filename) # writing adv batch to datastore logging.debug('Writing adversarial batch to datastore') self.adv_batches.write_single_batch_images_to_datastore(adv_batch_id) return elapsed_time_sec, submission_id
def download(self): """Method which downloads submission to local directory.""" # Structure of the download directory: # submission_dir=LOCAL_SUBMISSIONS_DIR/submission_id # submission_dir/s.ext <-- archived submission # submission_dir/extracted <-- extracted submission # Check whether submission is already there if self.extracted_submission_dir: return self.submission_dir = os.path.join(LOCAL_SUBMISSIONS_DIR, self.submission_id) if (os.path.isdir(self.submission_dir) and os.path.isdir(os.path.join(self.submission_dir, 'extracted'))): # submission already there, just re-read metadata self.extracted_submission_dir = os.path.join(self.submission_dir, 'extracted') with open(os.path.join(self.extracted_submission_dir, 'metadata.json'), 'r') as f: meta_json = json.load(f) self.container_name = str(meta_json[METADATA_CONTAINER]) self.entry_point = str(meta_json[METADATA_ENTRY_POINT]) return # figure out submission location in the Cloud and determine extractor submission_cloud_path = os.path.join('gs://', self.storage_bucket, self.submission.path) extract_command_tmpl = None extension = None for k, v in iteritems(EXTRACT_COMMAND): if submission_cloud_path.endswith(k): extension = k extract_command_tmpl = v break if not extract_command_tmpl: raise WorkerError('Unsupported submission extension') # download archive try: os.makedirs(self.submission_dir) tmp_extract_dir = os.path.join(self.submission_dir, 'tmp') os.makedirs(tmp_extract_dir) download_path = os.path.join(self.submission_dir, 's' + extension) try: logging.info('Downloading submission from %s to %s', submission_cloud_path, download_path) shell_call(['gsutil', 'cp', submission_cloud_path, download_path]) except subprocess.CalledProcessError as e: raise WorkerError('Can''t copy submission locally', e) # extract archive try: shell_call(extract_command_tmpl, src=download_path, dst=tmp_extract_dir) except subprocess.CalledProcessError as e: # proceed even if extraction returned non zero error code, # sometimes it's just warning logging.warning('Submission extraction returned non-zero error code. ' 'It may be just a warning, continuing execution. ' 'Error: %s', e) try: make_directory_writable(tmp_extract_dir) except subprocess.CalledProcessError as e: raise WorkerError('Can''t make submission directory writable', e) # determine root of the submission tmp_root_dir = tmp_extract_dir root_dir_content = [d for d in os.listdir(tmp_root_dir) if d != '__MACOSX'] if (len(root_dir_content) == 1 and os.path.isdir(os.path.join(tmp_root_dir, root_dir_content[0]))): tmp_root_dir = os.path.join(tmp_root_dir, root_dir_content[0]) # move files to extract subdirectory self.extracted_submission_dir = os.path.join(self.submission_dir, 'extracted') try: shell_call(['mv', os.path.join(tmp_root_dir), self.extracted_submission_dir]) except subprocess.CalledProcessError as e: raise WorkerError('Can''t move submission files', e) # read metadata file try: with open(os.path.join(self.extracted_submission_dir, 'metadata.json'), 'r') as f: meta_json = json.load(f) except IOError as e: raise WorkerError( 'Can''t read metadata.json for submission "{0}"'.format( self.submission_id), e) try: self.container_name = str(meta_json[METADATA_CONTAINER]) self.entry_point = str(meta_json[METADATA_ENTRY_POINT]) type_from_meta = METADATA_JSON_TYPE_TO_TYPE[meta_json[METADATA_TYPE]] except KeyError as e: raise WorkerError('Invalid metadata.json file', e) if type_from_meta != self.type: raise WorkerError('Inconsistent submission type in metadata: ' + type_from_meta + ' vs ' + self.type) except WorkerError as e: self.extracted_submission_dir = None sudo_remove_dirtree(self.submission_dir) raise
def kill_docker_container(container_name): """Kills given docker container.""" docker_id = get_id_of_running_docker(container_name) shell_call([DOCKER_BINARY, 'stop', docker_id])
def get_id_of_running_docker(container_name): """Returns ID of running docker container.""" return shell_call([DOCKER_BINARY, 'ps', '-q', '--filter=name={}'.format(container_name)]).strip()
def run_defenses(self): """Method which evaluates all defense work. In a loop this method queries not completed defense work, picks one defense work and runs it. """ logging.info("******** Start evaluation of defenses ********") prev_submission_id = None need_reload_work = True while True: # wait until work is available if need_reload_work: if self.num_defense_shards: shard_with_work = self.defense_work.read_undone_from_datastore( shard_id=(self.worker_id % self.num_defense_shards), num_shards=self.num_defense_shards, ) else: shard_with_work = self.defense_work.read_undone_from_datastore( ) logging.info( "Loaded %d records of undone work from shard %s", len(self.defense_work), str(shard_with_work), ) if not self.defense_work.work: logging.info("Work is not populated, waiting...") time.sleep(SLEEP_TIME) continue if self.defense_work.is_all_work_competed(): logging.info("All defense work completed.") break # download all defense data and dataset self.fetch_defense_data() need_reload_work = False # pick piece of work work_id = self.defense_work.try_pick_piece_of_work( self.worker_id, submission_id=prev_submission_id) if not work_id: need_reload_work = True logging.info("Failed to pick work, waiting...") time.sleep(SLEEP_TIME_SHORT) continue logging.info("Selected work_id: %s", work_id) # execute work try: ( elapsed_time_sec, prev_submission_id, batch_result, ) = self.run_defense_work(work_id) logging.info("Work %s is done", work_id) # indicate that work is completed is_work_update = self.defense_work.update_work_as_completed( self.worker_id, work_id, other_values={ "elapsed_time": elapsed_time_sec, "stat_correct": batch_result[0], "stat_error": batch_result[1], "stat_target_class": batch_result[2], "stat_num_images": batch_result[3], }, ) except WorkerError as e: logging.info("Failed to run work:\n%s", str(e)) if str(e).startswith("Docker returned non-zero retval"): logging.info( "Running nvidia-docker to ensure that GPU works") shell_call([ "nvidia-docker", "run", "--rm", "nvidia/cuda", "nvidia-smi" ]) is_work_update = self.defense_work.update_work_as_completed( self.worker_id, work_id, error=str(e)) if not is_work_update: logging.warning( "Can" 't update work "%s" as completed by worker %d', work_id, self.worker_id, ) need_reload_work = True logging.info("******** Finished evaluation of defenses ********")