def _create_single_drive(data_dir, device): """Creates a data drive out of a single device.""" cmds = [] cmds.append('mkfs.ext4 -F {}'.format(device)) cmds.append('mkdir -p {}'.format(data_dir)) cmds.append('mount {} {}'.format(device, data_dir)) cmds.append('chmod a+w {}'.format(data_dir)) utils.run_commands(cmds) logging.info('Created and mounted device %s at %s', device, data_dir)
def create_ram_disk(data_dir, disk_size): """Create a RAM disk.""" cmd = 'mountpoint -q {}'.format(data_dir) retcode, _ = utils.run_command(cmd) if retcode: cmds = [] cmds.append('mkdir -p {}'.format(data_dir)) cmds.append('mount -t tmpfs -o size={}m tmpfs {}'.format( disk_size, data_dir)) utils.run_commands(cmds) logging.info('Created RAM disk at %s', data_dir) else: logging.debug('RAM disk or something else is mounted at %s', data_dir)
def _create_drive_raid(data_dir, devices): """Creates a raid zero array of nvme drives.""" cmds = [] # Passing 'yes' because GCE nvme drive are sometimes in an odd state and # think they are in another raid. mdadm does not have -y option. # Or the kokoro images were left dirty? and that is where the info # comes from. cmds.append('yes | mdadm --create /dev/md0 --level=0 ' '--raid-devices={} {}'.format(len(devices), ' '.join(devices))) cmds.append('mkfs.ext4 -F /dev/md0') cmds.append('mkdir -p {}'.format(data_dir)) cmds.append('mount /dev/md0 {}'.format(data_dir)) cmds.append('chmod a+w {}'.format(data_dir)) utils.run_commands(cmds) logging.info('Created and mounted RAID array at %s', data_dir)
def _load_docker_image(FLAGS, workspace_dir, setup_execution_time): """Runs docker load --input_image <FLAGS.dockerfile_path>. Fetches FLAGS.dockerfile_path to workspace_dir/<temp-dir>/local_docker first. Runs docker load --input <path-to-local-docker>. Deletes workspace_dir/<temp-dir> after the docker image is loaded. Args: FLAGS: parser.parse_known_args object. workspace_dir: String - The path to use for intermediate artifacts. setup_execution_time: Map from string->double containing wall times for different operations. This will have insertions describing the docker setup time. """ load_docker_start_time = time.time() local_docker_image_path = _temporary_file_name(workspace_dir, 'local_docker') utils.download_data([{ 'url': FLAGS.dockerfile_path, 'local_path': local_docker_image_path, 'decompress': False }]) setup_execution_time['fetch_docker'] = time.time() - load_docker_start_time docker_load_cmd = 'docker load --input {}'.format(local_docker_image_path) try: utils.run_commands([ docker_load_cmd, 'docker images' # Print loaded image list. ]) setup_execution_time['load_docker'] = time.time( ) - load_docker_start_time finally: logging.info('removing parent dir of local docker image copy %s', local_docker_image_path) shutil.rmtree(os.path.dirname(local_docker_image_path))
workspace_dir, download_only=True) setup_execution_time['download_token'] = time.time() - start_time # Set up the raid array. start_time = time.time() device_utils.create_drive_from_devices(FLAGS.root_data_dir, FLAGS.gce_nvme_raid) setup_execution_time['create_drive'] = time.time() - start_time # Create docker image start_time = time.time() dockerfile_path = FLAGS.dockerfile_path if not os.path.exists(dockerfile_path): # Fall back to the deprecated approach if the user-specified # dockerfile_path does not exist dockerfile_path = os.path.join(project_dir, FLAGS.dockerfile_path) docker_tag = 'perfzero/tensorflow' if FLAGS.tensorflow_pip_spec: cmd = 'docker build --no-cache --pull -t {} --build-arg tensorflow_pip_spec={} - < {}'.format( # pylint: disable=line-too-long docker_tag, FLAGS.tensorflow_pip_spec, dockerfile_path) else: cmd = 'docker build --no-cache --pull -t {} - < {}'.format(docker_tag, dockerfile_path) # pylint: disable=line-too-long utils.run_commands([cmd]) logging.info('Built docker image with tag %s', docker_tag) setup_execution_time['build_docker'] = time.time() - start_time logging.info('Setup time in seconds by operation:\n %s', json.dumps(setup_execution_time, indent=2))
def _create_docker_image(FLAGS, project_dir, workspace_dir, setup_execution_time): """Creates a docker image. Args: FLAGS: parser.parse_known_args object. project_dir: String - The current project path. workspace_dir: String - The path to use for intermediate artifacts. setup_execution_time: Map from string->double containing wall times for different operations. This will have insertions describing the docker setup time. """ # Create docker image docker_start_time = time.time() docker_context = os.path.join(workspace_dir, 'resources') # Necessary in case we don't have a local .whl file. utils.create_empty_file(docker_context, 'EMPTY') # Download TensorFlow pip package from Google Cloud Storage and modify package # path accordingly, if applicable local_tensorflow_pip_spec = None if (FLAGS.tensorflow_pip_spec and (FLAGS.tensorflow_pip_spec.startswith('gs://') or FLAGS.tensorflow_pip_spec.startswith('file://'))): local_pip_filename = os.path.basename(FLAGS.tensorflow_pip_spec) local_pip_path = os.path.join(docker_context, local_pip_filename) utils.download_data([{ 'url': FLAGS.tensorflow_pip_spec, 'local_path': local_pip_path }]) # Update path to pip wheel file for the Dockerfile. Note that this path has # to be relative to the docker context (absolute path will not work). FLAGS.tensorflow_pip_spec = local_pip_filename local_tensorflow_pip_spec = local_pip_filename else: local_tensorflow_pip_spec = 'EMPTY' dockerfile_path = FLAGS.dockerfile_path if not os.path.exists(dockerfile_path): # Fall back to the deprecated approach if the user-specified # dockerfile_path does not exist dockerfile_path = os.path.join(project_dir, FLAGS.dockerfile_path) extra_pip_specs = (FLAGS.extra_pip_specs or '').replace(';', '') docker_base_cmd = 'docker build --no-cache --pull' # FLAGS.extra_docker_build_args will be a list of strings (e.g. ['a', 'b=c']). # We treat the strings directly as build-args: --build-arg a --build-arg b=c # Empty strings are ignored. extra_docker_build_args = ' '.join([ '--build-arg %s' % arg for arg in FLAGS.extra_docker_build_args if arg ]) cmd = '{docker_base_cmd} -t {docker_tag}{tf_pip}{local_tf_pip}{extra_pip}{extra_docker_build_args} {suffix}'.format( docker_base_cmd=docker_base_cmd, docker_tag=FLAGS.docker_tag, tf_pip=(' --build-arg tensorflow_pip_spec={}'.format( FLAGS.tensorflow_pip_spec) if FLAGS.tensorflow_pip_spec else ''), # local_tensorflow_pip_spec is either string 'EMPTY' or basename of # local .whl file. local_tf_pip=' --build-arg local_tensorflow_pip_spec={}'.format( local_tensorflow_pip_spec), extra_pip=' --build-arg extra_pip_specs=\'{}\''.format( extra_pip_specs), extra_docker_build_args=' ' + extra_docker_build_args, suffix=('-f {} {}'.format(dockerfile_path, docker_context) if docker_context else '- < {}'.format(dockerfile_path))) utils.run_commands([cmd]) logging.info('Built docker image with tag %s', FLAGS.docker_tag) setup_execution_time['build_docker'] = time.time() - docker_start_time