コード例 #1
0
 def RemoveRemoteFiles():
     gs_api = gsutil.GsutilApi()
     filenames = gs_api.List(
         gcloud_util.GcsPath(CLOUD_BASE_DIR, CLOUD_LOG_PATH))
     for filename in filenames:
         gs_api.DeleteFile(filename)
     self.assertFalse(
         gs_api.List(gcloud_util.GcsPath(CLOUD_BASE_DIR,
                                         CLOUD_LOG_PATH)))
コード例 #2
0
ファイル: client.py プロジェクト: yhppark902/makani
    def _MakeConfigPackage(self, package_path):
        """Produces the config package.

    Args:
      package_path: Path for the output file.

    Returns:
      Number of configs generated.

    Raises:
      BatchSimClientError: The final simulation time is unset or is too large,
      or running with keep_h5_logs and use_local_worker but local_h5_logs_dir
      is unset.
    """
        self._packager.OpenPackage(package_path)

        num_configs = 0
        for i, config in enumerate(self._GenerateConfigs()):
            if 'sim_time' not in config['sim']:
                raise BatchSimClientError('Sim time is unset.')
            elif config['sim']['sim_time'] > FLAGS.max_sim_time:
                raise BatchSimClientError(
                    'Sim time (%s) exceeds the value of --max_sim_time (%s).' %
                    (config['sim']['sim_time'], FLAGS.max_sim_time))

            if FLAGS.use_local_worker:
                config['output_file_path'] = os.path.abspath(
                    os.path.join(self._local_output_dir, '%d.json' % i))
            else:
                # TODO: It's weird having this one case in which we use the
                # gs://makani/ prefix for a Cloud Storage path.  It's probably better to
                # include it universally - knowing whether a path is meant to be remote
                # or local is useful debugging information.
                config['output_file_path'] = ('gs://makani/%s/%d.json' %
                                              (self._gcs_output_dir, i))

            config['h5_keep_sparse_log_only'] = (FLAGS.keep_sparse_h5_logs
                                                 and not FLAGS.keep_h5_logs)
            config['h5_log_file_path'] = ''
            if FLAGS.keep_h5_logs or FLAGS.keep_sparse_h5_logs:
                if FLAGS.use_local_worker:
                    if FLAGS.local_h5_logs_dir:
                        config['h5_log_file_path'] = os.path.join(
                            FLAGS.local_h5_logs_dir, '%d.h5' % i)
                    else:
                        raise BatchSimClientError(
                            'local_h5_logs_dir is unset.')
                else:
                    config['h5_log_file_path'] = gcloud_util.GcsPath(
                        'gs://makani/', self._gcs_h5_log_dir, '%d.h5' % i)

            self._packager.AddString(
                json.dumps(config, indent=2, separators=(',', ': ')),
                'gce_config/%d.json' % i)
            num_configs += 1

        self._packager.ClosePackage()
        return num_configs
コード例 #3
0
ファイル: auto_upload.py プロジェクト: yhppark902/makani
def _GetRemoteFilename(local_dir, filename, dest_dir, rename_template=None):
    """Get name of the remote file to upload to.

  Args:
    local_dir: The local directory where the file resides.
    filename: The name of the file.
    dest_dir: The remote directory to upload to.
    rename_template: The template used to rename the file at the destination
        (default: None (preserve the original filename)).

  Returns:
    full_cloud_path: Full path to the cloud file to upload to.
  """
    renamed_filename = _RenameFile(filename, rename_template, local_dir)
    return gcloud_util.GcsPath(dest_dir, renamed_filename)
コード例 #4
0
 def AssertRemoteFiles():
     gs_api = gsutil.GsutilApi()
     filenames = gs_api.List(
         gcloud_util.GcsPath(CLOUD_BASE_DIR, CLOUD_LOG_PATH))
     prefix = os.path.join(CLOUD_BASE_DIR, CLOUD_LOG_PATH)
     self.assertEqual(
         set(filenames), {
             os.path.join(prefix,
                          'w7/logs/logs1-folder/subfolder/dummy.json'),
             os.path.join(
                 prefix,
                 'w7/logs/logs1-folder/another_subfolder/dummy.json'),
             os.path.join(
                 prefix,
                 'w7/logs/logs1-folder/one_more_subfolder/dummy.json'),
             os.path.join(prefix, 'w7/logs/w7-2013.h5'),
             os.path.join(prefix, 'M600A/logs/m600.h5'),
             os.path.join(prefix, 'M600A/logs/dummy.json'),
             os.path.join(prefix, 'w7/logs/logs1-folder/dummy.json'),
             os.path.join(prefix, 'w7/logs/logs1-folder/dummy.txt'),
         })
コード例 #5
0
  def Upload(self, preserve_local, clean_uploaded):
    """Attempt to upload files according to the configuration.

    Args:
      preserve_local: True if local files should not be removed after
          uploading.
      clean_uploaded: True if local files that are previously uploaded
          should be removed.

    Raises:
      SynchronizerError: If an issue was found with the configuration.

    Returns:
      A list of tuples, each tuple has the form of (local_filename,
          uploaded_filename).
    """
    uploaded_files = []
    for system in self._config['systems']:
      if FLAGS.systems and system not in FLAGS.systems:
        continue
      for collection in self._config['collections']:
        if FLAGS.collections and collection['name'] not in FLAGS.collections:
          continue
        path_string = os.path.join(
            self._config['local_basedir'], system, collection['local_path'])
        local_path_template = string.Template(path_string)
        try:
          local_path = local_path_template.substitute(os.environ)
        except KeyError as e:
          logging.error('Local path %s expects a missing environment '
                        'variable: %s', path_string, e.message)
          continue
        if not os.path.isdir(local_path):
          logging.debug('Skipped nonexistent local directory "%s".', local_path)
          continue
        else:
          logging.info('Uploading local directory "%s" for collection "%s".',
                       local_path, collection['name'])

        for source in collection['sources']:
          # Upload logs from one local directory to the cloud.
          dest_name = source.get('dest_name', None)
          dest_path = gcloud_util.GcsPath(self._config['remote_basedir'],
                                          system,
                                          collection['remote_path'])
          src_dir_pattern = source.get('src_dir_pattern', None)
          src_pattern = source.get('src_pattern', None)

          existing_dest_paths, gs_api = auto_upload.PrepareToUpload(
              local_path, dest_path)

          if src_dir_pattern:
            regex_file = re.compile(src_pattern) if src_pattern else None
            for directory, dirpath in auto_upload.IterDirectories(
                local_path, src_dir_pattern):
              auto_upload.TryUploadDirectory(
                  directory, dirpath, dest_path, regex_file,
                  dest_name, gs_api, preserve_local,
                  True, clean_uploaded, uploaded_files)
          elif src_pattern:
            for filename, dirpath in auto_upload.IterFiles(
                local_path, src_pattern):
              result = auto_upload.TryUploadFile(
                  filename, dirpath, dest_path, existing_dest_paths,
                  dest_name, gs_api, preserve_local, True, clean_uploaded)
              if result:
                uploaded_files.append(result)
          else:
            raise SynchronizerError('A source requires at least a src_pattern '
                                    'or src_dir_pattern.')
    return uploaded_files
コード例 #6
0
ファイル: auto_upload.py プロジェクト: yhppark902/makani
def TryUploadDirectory(directory, parent_relpath, dest_dir, source_file_regex,
                       rename_template, gs_api, preserve_local, check_timing,
                       clean_uploaded, uploaded_files):
    """Attempt to upload a directory.

  Args:
    directory: The name of the directory.
    parent_relpath: The local directory where the directory resides.
    dest_dir: The remote directory to upload to.
    source_file_regex: The precompiled regular expression to test whether a file
        should be uploaded. If None, all files are uploaded.
        The regex is used to match the subpath within `directory`.
    rename_template: The template used to rename the file at the destination.
        If None, the original file name is preserved.
    gs_api: The gsutil.GsutilApi object.
    preserve_local: If True, the source files will remain after uploading.
    check_timing: If True, the upload will begin only if preconditions are met.
    clean_uploaded: True if a local log should be removed if the scan
        finds it is already uploaded.
    uploaded_files: A list of tuples, each has the form of (local_filename,
        uploaded_filename).

  Raises:
    BadTimeToUploadError: Raised if it is not the right time to upload.
  """
    base_relpath = os.path.join(parent_relpath, directory)

    renamed_directory = _RenameFile(directory, rename_template, parent_relpath)
    full_cloud_path = gcloud_util.GcsPath(dest_dir, renamed_directory)
    # Test if there exists any file with such prefix.
    # TODO: Could be made more efficient if there is an "Exist" call.
    is_new_path = not bool(gs_api.List(full_cloud_path))

    # Upload all files (except symbolic links) within the directory.
    # Do not rename any files within the directory.
    rename_template = None
    for sub_directory, sub_directories, files in os.walk(base_relpath):
        rel_path = os.path.relpath(sub_directory, base_relpath)
        if rel_path == '.':
            sub_cloud_directory = full_cloud_path
        else:
            sub_cloud_directory = gcloud_util.GcsPath(full_cloud_path,
                                                      rel_path)
        if is_new_path:
            existing_dest_paths = set()
        else:
            try:
                existing_dest_paths = set(gs_api.List(sub_cloud_directory))
            except httplib2.ServerNotFoundError:
                # Internet becomes unavailable.
                return
        # Files in upper level directories are uploaded first; we assume files in
        # subdirectories take lower priority / are less interesting.
        # In one directory, files tagged with larger timestamps are uploaded first.
        files.sort(reverse=True)
        for filename in files:
            file_path = os.path.join(sub_directory, filename)
            rel_path = os.path.relpath(file_path, base_relpath)
            if source_file_regex and not source_file_regex.match(rel_path):
                continue

            try:
                result = TryUploadFile(filename, sub_directory,
                                       sub_cloud_directory,
                                       existing_dest_paths, rename_template,
                                       gs_api, preserve_local, check_timing,
                                       clean_uploaded)
            except BadTimeToUploadError:
                return
            else:
                if result:
                    uploaded_files.append(result)
        # Traverse directories with larger timestamps first.
        sub_directories.sort(reverse=True)
コード例 #7
0
 def AssertNoRemoteFiles():
     gs_api = gsutil.GsutilApi()
     filenames = gs_api.List(
         gcloud_util.GcsPath(CLOUD_BASE_DIR, CLOUD_LOG_PATH))
     self.assertEqual(filenames, [])
コード例 #8
0
    def testUploadGoodTiming(self):
        def AssertRemoteFiles():
            gs_api = gsutil.GsutilApi()
            filenames = gs_api.List(
                gcloud_util.GcsPath(CLOUD_BASE_DIR, CLOUD_LOG_PATH))
            prefix = os.path.join(CLOUD_BASE_DIR, CLOUD_LOG_PATH)
            self.assertEqual(
                set(filenames), {
                    os.path.join(prefix,
                                 'w7/logs/logs1-folder/subfolder/dummy.json'),
                    os.path.join(
                        prefix,
                        'w7/logs/logs1-folder/another_subfolder/dummy.json'),
                    os.path.join(
                        prefix,
                        'w7/logs/logs1-folder/one_more_subfolder/dummy.json'),
                    os.path.join(prefix, 'w7/logs/w7-2013.h5'),
                    os.path.join(prefix, 'M600A/logs/m600.h5'),
                    os.path.join(prefix, 'M600A/logs/dummy.json'),
                    os.path.join(prefix, 'w7/logs/logs1-folder/dummy.json'),
                    os.path.join(prefix, 'w7/logs/logs1-folder/dummy.txt'),
                })

        def RemoveRemoteFiles():
            gs_api = gsutil.GsutilApi()
            filenames = gs_api.List(
                gcloud_util.GcsPath(CLOUD_BASE_DIR, CLOUD_LOG_PATH))
            for filename in filenames:
                gs_api.DeleteFile(filename)
            self.assertFalse(
                gs_api.List(gcloud_util.GcsPath(CLOUD_BASE_DIR,
                                                CLOUD_LOG_PATH)))

        test_config = self._LoadTestConfig()
        synchronizer = self._CreateLogSynchronizerFromJson(test_config)
        upload_expected = [
            # 1. Uploading *.h5 files from the first source.
            (os.path.join(self._test_dir, 'logs/M600A/m600.h5'),
             gcloud_util.GcsPath(CLOUD_BASE_DIR, CLOUD_LOG_PATH,
                                 'M600A/logs/m600.h5')),
            (os.path.join(self._test_dir, 'logs/w7/w7-2013.h5'),
             gcloud_util.GcsPath(CLOUD_BASE_DIR, CLOUD_LOG_PATH,
                                 'w7/logs/w7-2013.h5')),
            # 2. Uploading the `folder` directory from the second source.
            # The files are uploaded before subdirectories.
            # Files are uploaded in reverse alphabetical order.
            (os.path.join(self._test_dir, 'logs/w7/logs1/folder/dummy.txt'),
             gcloud_util.GcsPath(CLOUD_BASE_DIR, CLOUD_LOG_PATH,
                                 'w7/logs/logs1-folder/dummy.txt')),
            (os.path.join(self._test_dir, 'logs/w7/logs1/folder/dummy.json'),
             gcloud_util.GcsPath(CLOUD_BASE_DIR, CLOUD_LOG_PATH,
                                 'w7/logs/logs1-folder/dummy.json')),
            # Subdirectories are uploaded in reverse alphabetical order.
            (os.path.join(self._test_dir,
                          'logs/w7/logs1/folder/subfolder/dummy.json'),
             gcloud_util.GcsPath(CLOUD_BASE_DIR, CLOUD_LOG_PATH,
                                 'w7/logs/logs1-folder/subfolder/dummy.json')),
            (os.path.join(
                self._test_dir, 'logs/w7/logs1/folder/one_more_subfolder/'
                'dummy.json'),
             gcloud_util.GcsPath(
                 CLOUD_BASE_DIR, CLOUD_LOG_PATH,
                 'w7/logs/logs1-folder/one_more_subfolder/dummy.json')),
            (os.path.join(
                self._test_dir, 'logs/w7/logs1/folder/another_subfolder/'
                'dummy.json'),
             gcloud_util.GcsPath(
                 CLOUD_BASE_DIR, CLOUD_LOG_PATH,
                 'w7/logs/logs1-folder/another_subfolder/dummy.json')),
            # 3. Uploading dummy.json from the third source.
            (os.path.join(self._test_dir, 'logs/M600A/dummy.json'),
             gcloud_util.GcsPath(CLOUD_BASE_DIR, CLOUD_LOG_PATH,
                                 'M600A/logs/dummy.json')),
        ]

        all_files_to_upload = set(pair[0] for pair in upload_expected)
        with PatchCloudFakes(self._test_dir) as cloud_fake:
            with PatchEnvFakes(has_internet=True, is_idle=True):
                uploaded = synchronizer.Upload(preserve_local=True,
                                               clean_uploaded=False)
                uploaded.sort(key=lambda x: x[0])
                upload_expected.sort(key=lambda x: x[0])
                self.assertEqual(uploaded, upload_expected)
                AssertRemoteFiles()
                # Upload again should result in no updates
                uploaded = synchronizer.Upload(preserve_local=True,
                                               clean_uploaded=False)
                self.assertEqual(uploaded, [])
                AssertRemoteFiles()

                # Make a snapshot of files in the faked file system.
                local_files_copy = copy.copy(cloud_fake.GetLocalFiles())
                # If we remove remote files and retry, they should be uploaded again.
                RemoveRemoteFiles()
                uploaded = synchronizer.Upload(preserve_local=False,
                                               clean_uploaded=False)
                uploaded.sort(key=lambda x: x[0])
                upload_expected.sort(key=lambda x: x[0])
                self.assertEqual(uploaded, upload_expected)
                # However, local files should be deleted if preserve_local is False.
                self.assertFalse(all_files_to_upload
                                 & set(cloud_fake.GetLocalFiles()))

                # Now restore the faked file system as if no local files are deleted.
                cloud_fake.SetLocalFiles(local_files_copy)
                # Upload again. No files should be uploaded because they are already
                # in the cloud, but local files should be deleted.
                uploaded = synchronizer.Upload(preserve_local=True,
                                               clean_uploaded=True)
                self.assertEqual(uploaded, [])
                self.assertFalse(all_files_to_upload
                                 & set(cloud_fake.GetLocalFiles()))
                # Make a snapshot of files in the faked cloud system.
                cloud_files_copy = copy.copy(cloud_fake.GetCloudFiles())

        with PatchCloudFakes(self._test_dir,
                             right_checksum=False) as cloud_fake:
            with PatchEnvFakes(has_internet=True, is_idle=True):
                # Restore the cloud file system as if files are uploaded already.
                cloud_fake.SetCloudFiles(cloud_files_copy)
                AssertRemoteFiles()
                # No upload should be successful, due to the wrong checksum.
                uploaded = synchronizer.Upload(preserve_local=False,
                                               clean_uploaded=True)
                self.assertEqual(uploaded, [])
                AssertRemoteFiles()
                # All files should be preserved locally due to mismatched checksum.
                self.assertEqual(
                    all_files_to_upload & set(cloud_fake.GetLocalFiles()),
                    all_files_to_upload)
コード例 #9
0
import string
import tempfile
import unittest

from makani.lib.log_synchronizer import auto_upload
from makani.lib.log_synchronizer import synchronizer as logsync
from makani.lib.python import gsutil
from makani.lib.python import gsutil_fakes
from makani.lib.python.batch_sim import gcloud_fakes
from makani.lib.python.batch_sim import gcloud_util
import mock

# The cloud folder that serves all log collections.
CLOUD_BUCKET = 'gs://makani'
CLOUD_ROOT_PATH = 'sandbox/logs'
CLOUD_BASE_DIR = gcloud_util.GcsPath(CLOUD_BUCKET, CLOUD_ROOT_PATH)
# The cloud folder to upload the collection of logs during test.
CLOUD_LOG_PATH = 'cloud/subfolder/'


class PatchCloudFakes(object):
    """Patches to fake cloud storage needed for a log synchronizer unit test.

  Exclusively for use as a context manager.
  """
    def __init__(self, test_dir, right_checksum=True):
        """Creates the Patcher.

    Args:
      test_dir: The local directory containing the testing files.
      right_checksum: If False, the faked GsutilApi returns the