Ejemplo n.º 1
0
    def rm_partition(self, partition):
        """
        Deletes the given partition entry from the bundle store, and purges the lru cache. Does not move any bundles.
        """

        if self.__get_num_partitions() == 1:
            """
            Prevent foot-shooting
            """
            print >>sys.stderr, "Error, cannot remove last partition. If you really wish to delete CodaLab, please run the following command:"
            print >>sys.stderr, "      rm -rf %s" % self.codalab_home
            return

        partition_abs_path = os.path.join(self.partitions, partition)

        try:
            print(partition_abs_path)
            path_util.check_isvalid(partition_abs_path, 'rm-partition')
        except:
            print >>sys.stderr, "Partition with name '%s' does not exist. Run `cl ls-partitions` to see a list of mounted partitions." % partition
            sys.exit(1)

        print >>sys.stderr, "Unlinking partition %s from CodaLab deployment..." % partition
        path_util.remove(partition_abs_path)
        nodes, _ = path_util.ls(self.partitions)
        self.nodes = nodes
        print >>sys.stderr, "Partition removed successfully from bundle store pool"
        print >>sys.stdout, "Warning: this does not affect the bundles in the removed partition or any entries in the bundle database"
        self.lru_cache = OrderedDict()
    def upload_bundle(self, path, info, worksheet_uuid, follow_symlinks, exclude_patterns, add_to_worksheet):
        # URLs can be directly passed to the local client.
        if path and not isinstance(path, list) and path_util.path_is_url(path):
            return self.upload_bundle_url(path, info, worksheet_uuid, follow_symlinks, exclude_patterns)

        # First, zip path up (temporary local zip file).
        if path:
            name = info['metadata']['name']
            zip_path = zip_util.zip(path, follow_symlinks=follow_symlinks, exclude_patterns=exclude_patterns, file_name=name)
            # Copy it up to the server (temporary remote zip file)
            with open(zip_path, 'rb') as source:
                remote_file_uuid = self.open_temp_file()
                dest = RPCFileHandle(remote_file_uuid, self.proxy)
                # FileServer does not expose an API for forcibly flushing writes, so
                # we rely on closing the file to flush it.
                file_util.copy(source, dest, autoflush=False, print_status='Uploading %s%s to %s' % (zip_path, ' ('+info['uuid']+')' if 'uuid' in info else '', self.address))
                dest.close()
        else:
            remote_file_uuid = None
            zip_path = None

        # Finally, install the zip file (this will be in charge of deleting that zip file).
        result = self.upload_bundle_zip(remote_file_uuid, info, worksheet_uuid, follow_symlinks, add_to_worksheet)

        if zip_path:
            path_util.remove(zip_path)  # Remove local zip
        return result
Ejemplo n.º 3
0
 def reset(self):
     """
     Delete all stored bundles and then recreate the root directories.
     """
     # Do not run this function in production!
     path_util.remove(self.partitions)
     self.initialize_store()
Ejemplo n.º 4
0
 def finalize_file(self, file_uuid, delete):
     '''
     Remove the record from the file server.
     '''
     path = self.file_paths.pop(file_uuid)
     file_handle = self.file_handles.pop(file_uuid, None)
     if delete and path: path_util.remove(path)
Ejemplo n.º 5
0
    def rm_partition(self, partition):
        """
        Deletes the given partition entry from the bundle store, and purges the lru cache. Does not move any bundles.
        """

        if self.__get_num_partitions() == 1:
            """
            Prevent foot-shooting
            """
            print >> sys.stderr, "Error, cannot remove last partition. If you really wish to delete CodaLab, please run the following command:"
            print >> sys.stderr, "      rm -rf %s" % self.codalab_home
            return

        partition_abs_path = os.path.join(self.partitions, partition)

        try:
            print(partition_abs_path)
            path_util.check_isvalid(partition_abs_path, 'rm-partition')
        except:
            print >> sys.stderr, "Partition with name '%s' does not exist. Run `cl ls-partitions` to see a list of mounted partitions." % partition
            sys.exit(1)

        print >> sys.stderr, "Unlinking partition %s from CodaLab deployment..." % partition
        path_util.remove(partition_abs_path)
        nodes, _ = path_util.ls(self.partitions)
        self.nodes = nodes
        print >> sys.stderr, "Partition removed successfully from bundle store pool"
        print >> sys.stdout, "Warning: this does not affect the bundles in the removed partition or any entries in the bundle database"
        self.lru_cache = OrderedDict()
Ejemplo n.º 6
0
def zip(path):
  '''
  Take a path to a file or directory and return the path to a zip archive
  containing its contents.
  '''
  absolute_path = path_util.normalize(path)
  path_util.check_isvalid(absolute_path, 'zip_directory')
  # Recursively copy the directory into a temp directory.
  temp_path = tempfile.mkdtemp()
  temp_subpath = os.path.join(temp_path, ZIP_SUBPATH)
  path_util.copy(absolute_path, temp_subpath)
  # Multiplex between zipping a directory and zipping a file here, because
  # make_archive does NOT handle the file case cleanly.
  if os.path.isdir(temp_subpath):
    zip_path = shutil.make_archive(
      base_name=temp_path,
      base_dir=ZIP_SUBPATH,
      root_dir=temp_path,
      format='zip',
    )
  else:
    zip_path = temp_path + '.zip'
    with ZipFile(zip_path, 'w') as zip_file:
      zip_file.write(temp_subpath, ZIP_SUBPATH)
  # Clean up the temporary directory and return the zip file's path.
  path_util.remove(temp_path)
  return zip_path
Ejemplo n.º 7
0
    def finalize_bundle(self, bundle):
        if self.verbose >= 1: print '=== finalize_bundle(%s)' % bundle.uuid
        if not self._exists(bundle): return True

        try:
            args = self.dispatch_command.split() + ['cleanup', bundle.metadata.job_handle]
            result = self.run_command_get_stdout_json(args)
            # Sync this with files created in start_bundle
            temp_dir = bundle.metadata.temp_dir
            if getattr(bundle.metadata, 'docker_image', None):
                container_file = temp_dir + '.cid'
                action_file = temp_dir + '.action'
                status_dir = temp_dir + '.status'
                script_file = temp_dir + '.sh'
                internal_script_file = temp_dir + '-internal.sh'
                temp_files = [container_file, action_file, status_dir, script_file, internal_script_file]
            else:
                script_file = temp_dir + '.sh'
                temp_files = [script_file]
            for f in temp_files:
                if os.path.exists(f):
                    path_util.remove(f)
            ok = True
        except Exception, e:
            print '=== INTERNAL ERROR: %s' % e
            traceback.print_exc()
            ok = False
Ejemplo n.º 8
0
 def reset(self):
     """
     Delete all stored bundles and then recreate the root directories.
     """
     # Do not run this function in production!
     path_util.remove(self.partitions)
     self.initialize_store()
Ejemplo n.º 9
0
 def finalize_file(self, file_uuid, delete):
     '''
     Remove the record from the file server.
     '''
     path = self.file_paths.pop(file_uuid)
     file_handle = self.file_handles.pop(file_uuid, None)
     if delete and path: path_util.remove(path)
Ejemplo n.º 10
0
 def upload_bundle_zip(self, file_uuid, construct_args, worksheet_uuid, follow_symlinks, add_to_worksheet):
     '''
     |file_uuid| specifies a pointer to the temporary file X.
     - If X is a non-zip file, then just upload X as an ordinary file.
     - If X is a zip file containing one file/directory Y representing bundle, then upload Y.
     - If X is a zip file containing multiple files/directories, then upload X.
     Return the new bundle's uuid.
     Note: delete the file_uuid file and X if needed (these are temporary files).
     '''
     if file_uuid:
         orig_path = self.file_paths[file_uuid]  # Note: cheat and look at file_server's data
         precondition(orig_path, 'Unexpected file uuid: %s' % (file_uuid,))
         if zip_util.is_zip_file(orig_path):
             container_path = tempfile.mkdtemp()  # Make temporary directory
             zip_util.unzip(orig_path, container_path, file_name=None)  # Unzip into a directory
             # If the container path only has one item, then make that the final path
             sub_files = os.listdir(container_path)
             if len(sub_files) == 1:
                 final_path = os.path.join(container_path, sub_files[0])
             else:  # Otherwise, use the container path
                 final_path = container_path
                 container_path = None
         else:
             # Not a zip file!  Just upload it normally as a file.
             final_path = orig_path
             container_path = None  # Don't need to delete
     else:
         final_path = None
     result = self.client.upload_bundle(final_path, construct_args, worksheet_uuid, follow_symlinks, exclude_patterns=[], add_to_worksheet=add_to_worksheet)
     if file_uuid:
         if container_path:
             path_util.remove(container_path)  # Remove temporary directory
         self.finalize_file(file_uuid, final_path != orig_path)  # Remove temporary file
     return result
Ejemplo n.º 11
0
    def upload_bundle(self, path, info, worksheet_uuid, follow_symlinks):
        # URLs can be directly passed to the local client.
        if path and not isinstance(path, list) and path_util.path_is_url(path):
            return self.upload_bundle_url(path, info, worksheet_uuid, follow_symlinks)

        # First, zip path up (temporary local zip file).
        if path:
            zip_path, sub_path = zip_util.zip(path, follow_symlinks=follow_symlinks)
            # Copy it up to the server (temporary remote zip file)
            with open(zip_path, 'rb') as source:
                remote_file_uuid = self.open_temp_file()
                dest = RPCFileHandle(remote_file_uuid, self.proxy)
                # FileServer does not expose an API for forcibly flushing writes, so
                # we rely on closing the file to flush it.
                file_util.copy(source, dest, autoflush=False, print_status=True)
                dest.close()
        else:
            remote_file_uuid = None
            zip_path = None

        # Finally, install the zip file (this will be in charge of deleting that zip file).
        result = self.upload_bundle_zip(remote_file_uuid, info, worksheet_uuid, follow_symlinks)

        if zip_path:
            path_util.remove(zip_path)  # Remove local zip
        return result
Ejemplo n.º 12
0
    def finalize_bundle(self, bundle):
        if self.verbose >= 1: print '=== finalize_bundle(%s)' % bundle.uuid
        if not self._exists(bundle): return True

        try:
            args = self.dispatch_command.split() + [
                'cleanup', bundle.metadata.job_handle
            ]
            result = self.run_command_get_stdout_json(args)
            # Sync this with files created in start_bundle
            temp_dir = bundle.metadata.temp_dir
            if getattr(bundle.metadata, 'docker_image', None):
                container_file = temp_dir + '.cid'
                action_file = temp_dir + '.action'
                status_dir = temp_dir + '.status'
                script_file = temp_dir + '.sh'
                internal_script_file = temp_dir + '-internal.sh'
                temp_files = [
                    container_file, action_file, status_dir, script_file,
                    internal_script_file
                ]
            else:
                script_file = temp_dir + '.sh'
                temp_files = [script_file]
            for f in temp_files:
                if os.path.exists(f):
                    path_util.remove(f)
            ok = True
        except Exception, e:
            print '=== INTERNAL ERROR: %s' % e
            traceback.print_exc()
            ok = False
Ejemplo n.º 13
0
def zip(path, follow_symlinks, exclude_names=[], file_name=None):
    '''
    Take a path to a file or directory and return the path to a zip archive
    containing its contents.
    '''
    if isinstance(path, list):
        for p in path:
            absolute_path = path_util.normalize(p)
            path_util.check_isvalid(absolute_path, 'zip_directory')
    else:
        absolute_path = path_util.normalize(path)
        path_util.check_isvalid(absolute_path, 'zip_directory')

    # Add proper name
    if file_name:
        sub_path = file_name
    else:
        sub_path = ZIP_SUBPATH

    # Recursively copy the directory into a temp directory.
    temp_path = tempfile.mkdtemp()
    temp_subpath = os.path.join(temp_path, sub_path)

    # TODO: this is inefficient; do the zipping from the original source
    # directly.
    if isinstance(path, list):
        os.mkdir(temp_subpath)
        for p in path:
            absolute_path = path_util.normalize(p)
            path_util.copy(absolute_path, os.path.join(temp_subpath, os.path.basename(p)), follow_symlinks=follow_symlinks, exclude_names=exclude_names)
    else:
        absolute_path = path_util.normalize(path)
        path_util.copy(absolute_path, temp_subpath, follow_symlinks=follow_symlinks, exclude_names=exclude_names)

    # TODO: These methods of zipping don't preserve permissions, so using a
    # system call for now (only works in Linux)
    # Multiplex between zipping a directory and zipping a file here, because
    # make_archive does NOT handle the file case cleanly.
    #if os.path.isdir(temp_subpath):
    #    zip_path = shutil.make_archive(
    #      base_name=temp_path,
    #      base_dir=ZIP_SUBPATH,
    #      root_dir=temp_path,
    #      format='zip',
    #    )
    #else:
    #    zip_path = temp_path + '.zip'
    #    with ZipFile(zip_path, 'w') as zip_file:
    #        zip_file.write(temp_subpath, ZIP_SUBPATH)
    # Clean up the temporary directory and return the zip file's path.

    zip_path = temp_path + '.zip'
    opts = '-qr'
    if not follow_symlinks: opts += ' --symlinks'
    if os.system("cd %s && zip %s %s %s" % (temp_path, opts, zip_path, sub_path)) != 0:
        raise UsageError('zip failed')

    path_util.remove(temp_path)
    return zip_path, sub_path
Ejemplo n.º 14
0
 def cleanup(self, uuid, dry_run):
     '''
     Remove the bundle with given UUID from on-disk storage.
     '''
     absolute_path = self.get_bundle_location(uuid)
     print >>sys.stderr, "cleanup: data %s" % absolute_path
     if not dry_run:
         path_util.remove(absolute_path)
Ejemplo n.º 15
0
 def _reset(self):
     '''
     Delete all stored bundles and then recreate the root directories.
     '''
     # Do not run this function in production!
     path_util.remove(self.data)
     path_util.remove(self.temp)
     self.make_directories()
Ejemplo n.º 16
0
 def _reset(self):
     '''
     Delete all stored bundles and then recreate the root directories.
     '''
     # Do not run this function in production!
     path_util.remove(self.data)
     path_util.remove(self.temp)
     self.make_directories()
Ejemplo n.º 17
0
 def cleanup(self, uuid, dry_run):
     '''
     Remove the bundle with given UUID from on-disk storage.
     '''
     absolute_path = self.get_bundle_location(uuid)
     print("cleanup: data %s" % absolute_path, file=sys.stderr)
     if not dry_run:
         path_util.remove(absolute_path)
Ejemplo n.º 18
0
 def cleanup(self, model, data_hash):
     '''
     If the given data hash is not needed for any bundle, delete its data.
     '''
     bundles = model.batch_get_bundles(data_hash=data_hash)
     if not bundles:
         absolute_path = self.get_location(data_hash)
         path_util.remove(absolute_path)
Ejemplo n.º 19
0
 def _unpack_file(self, source_path, dest_path, remove_source,
                  simplify_archive):
     zip_util.unpack(zip_util.get_archive_ext(source_path), source_path,
                     dest_path)
     if remove_source:
         path_util.remove(source_path)
     if simplify_archive:
         self._simplify_archive(dest_path)
Ejemplo n.º 20
0
 def remove_dependencies(self, bundle_store, parent_dict, dest_path):
     '''
     Remove dependencies (for RunBundles).
     '''
     precondition(os.path.isabs(dest_path), '%s is a relative path!' % (dest_path,))
     pairs = self.get_dependency_paths(bundle_store, parent_dict, dest_path, relative_symlinks=False)
     for (target, link_path) in pairs:
         # If the dependency already exists, remove it (this happens when we are reinstalling)
         if os.path.exists(link_path):
             path_util.remove(link_path)
Ejemplo n.º 21
0
    def add_partition(self, target, new_partition_name):
        """
        MultiDiskBundleStore specific method. Add a new partition to the bundle store. The "target" is actually a symlink to
        the target directory, which the user has configured as the mountpoint for some desired partition.

        First, all bundles that are to be relocated onto the new partition are copied to a temp location to be resilient
        against failures. After the copy is performed, the bundles are subsequently moved to the new partition, and finally
        the original copy of the bundles are deleted from their old locations
        """
        target = os.path.abspath(target)
        new_partition_location = os.path.join(self.partitions, new_partition_name)

        mtemp = os.path.join(target, MultiDiskBundleStore.TEMP_SUBDIRECTORY)

        try:
            path_util.make_directory(mtemp)
        except:
            print >> sys.stderr, "Could not make directory %s on partition %s, aborting" % (mtemp, target)
            sys.exit(1)

        self.ring.add_node(new_partition_name)  # Add the node to the partition locations
        delete_on_success = []  # Paths to bundles that will be deleted after the copy finishes successfully

        print >> sys.stderr, "Marking bundles for placement on new partition %s (might take a while)" % new_partition_name
        # For each bundle in the bundle store, check to see if any hash to the new partition. If so move them over
        partitions, _ = path_util.ls(self.partitions)
        for partition in partitions:
            partition_abs_path = os.path.join(self.partitions, partition, MultiDiskBundleStore.DATA_SUBDIRECTORY)
            bundles = reduce(lambda dirs, files: dirs + files, path_util.ls(partition_abs_path))
            for bundle in bundles:
                correct_partition = self.ring.get_node(bundle)
                if correct_partition != partition:
                    # Reposition the node to the correct partition
                    from_path = os.path.join(self.partitions, partition, MultiDiskBundleStore.DATA_SUBDIRECTORY, bundle)
                    to_path = os.path.join(mtemp, bundle)
                    print >> sys.stderr, "copying %s to %s" % (from_path, to_path)
                    path_util.copy(from_path, to_path)
                    delete_on_success += [from_path]

        print >> sys.stderr, "Adding new partition as %s..." % new_partition_location
        path_util.soft_link(target, new_partition_location)

        # Atomically move the temp location to the new partition's mdata
        new_mdata = os.path.join(new_partition_location, MultiDiskBundleStore.DATA_SUBDIRECTORY)
        new_mtemp = os.path.join(new_partition_location, MultiDiskBundleStore.TEMP_SUBDIRECTORY)
        path_util.rename(new_mtemp, new_mdata)
        path_util.make_directory(new_mtemp)

        # Go through and purge all of the originals at this time
        print >> sys.stderr, "Cleaning up drives..."
        for to_delete in delete_on_success:
            path_util.remove(to_delete)

        print >> sys.stderr, "Successfully added partition '%s' to the pool." % new_partition_name
Ejemplo n.º 22
0
 def cleanup(self, model, data_hash, except_bundle_uuids, dry_run):
     '''
     If the given data hash is not needed by any bundle (not in
     except_bundle_uuids), delete the data.
     '''
     bundles = model.batch_get_bundles(data_hash=data_hash)
     if all(bundle.uuid in except_bundle_uuids for bundle in bundles):
         absolute_path = self.get_location(data_hash)
         print >> sys.stderr, "cleanup: data %s" % absolute_path
         if not dry_run:
             path_util.remove(absolute_path)
Ejemplo n.º 23
0
 def cleanup(self, model, data_hash, except_bundle_uuids, dry_run):
     '''
     If the given data hash is not needed by any bundle (not in
     except_bundle_uuids), delete the data.
     '''
     bundles = model.batch_get_bundles(data_hash=data_hash)
     if all(bundle.uuid in except_bundle_uuids for bundle in bundles):
         absolute_path = self.get_location(data_hash)
         print >>sys.stderr, "cleanup: data %s" % absolute_path
         if not dry_run:
             path_util.remove(absolute_path)
Ejemplo n.º 24
0
 def full_cleanup(self, model):
     '''
     For each data hash in the store, check if it should be garbage collected and
     delete its data if so. In addition, delete any old temporary files.
     '''
     old_data_files = self.list_old_files(self.data, self.DATA_CLEANUP_TIME)
     for data_hash in old_data_files:
         self.cleanup(model, data_hash)
     old_temp_files = self.list_old_files(self.temp, self.TEMP_CLEANUP_TIME)
     for temp_file in old_temp_files:
         temp_path = os.path.join(self.temp, temp_file)
         path_util.remove(temp_path)
Ejemplo n.º 25
0
    def _simplify_directory(self, path, child_path=None):
        """
        Modifies |path| in place: If the |path| directory contains exactly
        one file / directory, then replace |path| with that file / directory.
        """
        if child_path is None:
            child_path = os.listdir(path)[0]

        temp_path = path + crypt_util.get_random_string()
        path_util.rename(path, temp_path)
        child_path = os.path.join(temp_path, child_path)
        path_util.rename(child_path, path)
        path_util.remove(temp_path)
Ejemplo n.º 26
0
    def finalize_bundle(self, bundle):
        if not self.bundle or self.bundle.uuid != bundle.uuid: return False

        try:
            script_file = self.temp_dir + '.sh'
            for f in [script_file]:
                if os.path.exists(f):
                    path_util.remove(f)
            ok = True
        except Exception, e:
            print >>sys.stderr, '=== INTERNAL ERROR: %s' % e
            traceback.print_exc()
            ok = False
Ejemplo n.º 27
0
    def finalize_bundle(self, bundle):
        if not self.bundle or self.bundle.uuid != bundle.uuid: return False

        try:
            script_file = self.temp_dir + '.sh'
            for f in [script_file]:
                if os.path.exists(f):
                    path_util.remove(f)
            ok = True
        except Exception, e:
            print '=== INTERNAL ERROR: %s' % e
            traceback.print_exc()
            ok = False
Ejemplo n.º 28
0
    def _simplify_directory(self, path, child_path=None):
        """
        Modifies |path| in place: If the |path| directory contains exactly
        one file / directory, then replace |path| with that file / directory.
        """
        if child_path is None:
            child_path = os.listdir(path)[0]

        temp_path = path + crypt_util.get_random_string()
        path_util.rename(path, temp_path)
        child_path = os.path.join(temp_path, child_path)
        path_util.rename(child_path, path)
        path_util.remove(temp_path)
Ejemplo n.º 29
0
 def full_cleanup(self, model, dry_run):
     """
     For each data hash in the store, check if it should be garbage collected and
     delete its data if so. In addition, delete any old temporary files.
     """
     old_data_files = self.list_old_files(self.data, self.DATA_CLEANUP_TIME)
     for data_hash in old_data_files:
         self.cleanup(model, data_hash, [], dry_run)
     old_temp_files = self.list_old_files(self.temp, self.TEMP_CLEANUP_TIME)
     for temp_file in old_temp_files:
         temp_path = os.path.join(self.temp, temp_file)
         print >>sys.stderr, "cleanup: temp %s" % temp_path
         if not dry_run:
             path_util.remove(temp_path)
Ejemplo n.º 30
0
 def remove_dependencies(self, bundle_store, parent_dict, dest_path):
     '''
     Remove dependencies (for RunBundles).
     '''
     precondition(os.path.isabs(dest_path),
                  '%s is a relative path!' % (dest_path, ))
     pairs = self.get_dependency_paths(bundle_store,
                                       parent_dict,
                                       dest_path,
                                       relative_symlinks=False)
     for (target, link_path) in pairs:
         # If the dependency already exists, remove it (this happens when we are reinstalling)
         if os.path.exists(link_path):
             path_util.remove(link_path)
Ejemplo n.º 31
0
 def full_cleanup(self, model, dry_run):
     '''
     For each data hash in the store, check if it should be garbage collected and
     delete its data if so. In addition, delete any old temporary files.
     '''
     old_data_files = self.list_old_files(self.data, self.DATA_CLEANUP_TIME)
     for data_hash in old_data_files:
         self.cleanup(model, data_hash, [], dry_run)
     old_temp_files = self.list_old_files(self.temp, self.TEMP_CLEANUP_TIME)
     for temp_file in old_temp_files:
         temp_path = os.path.join(self.temp, temp_file)
         print >> sys.stderr, "cleanup: temp %s" % temp_path
         if not dry_run:
             path_util.remove(temp_path)
Ejemplo n.º 32
0
def open_and_edit(suffix, template=''):
    editor = find_default_editor()
    tempfile_name = ''
    with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as form:
        form.write(template)
        form.flush()
        tempfile_name = form.name
    lines = ''
    if os.path.isfile(tempfile_name):
        subprocess.call([editor, tempfile_name])
        with open(tempfile_name, 'rb') as form:
            lines = form.readlines()
        path_util.remove(tempfile_name)

    return lines
Ejemplo n.º 33
0
def open_and_edit(suffix, template=''):
    editor = find_default_editor()
    tempfile_name = ''
    with tempfile.NamedTemporaryFile(suffix=suffix, delete=False) as form:
        form.write(template.encode('utf-8'))
        form.flush()
        tempfile_name = form.name
    lines = ''
    if os.path.isfile(tempfile_name):
        subprocess.call([editor, tempfile_name])
        with open(tempfile_name, 'r') as form:
            lines = form.readlines()
        path_util.remove(tempfile_name)

    return lines
Ejemplo n.º 34
0
def unpack(ext, source, dest_path):
    """
    Unpack the archive |source| to |dest_path|.
    Note: |source| can be a file handle or a path.
    |ext| contains the extension of the archive.
    """
    if ext != '.zip':
        close_source = False
        try:
            if isinstance(source, str):
                source = open(source, 'rb')
                close_source = True

            if ext == '.tar.gz' or ext == '.tgz':
                un_tar_directory(source, dest_path, 'gz')
            elif ext == '.tar.bz2':
                un_tar_directory(source, dest_path, 'bz2')
            elif ext == '.bz2':
                un_bz2_file(source, dest_path)
            elif ext == '.gz':
                with open(dest_path, 'wb') as f:
                    shutil.copyfileobj(un_gzip_stream(source), f)
            else:
                raise UsageError('Not an archive.')
        except (tarfile.TarError, IOError):
            raise UsageError('Invalid archive upload.')
        finally:
            if close_source:
                source.close()
    else:
        delete_source = False
        try:
            # unzip doesn't accept input from standard input, so we have to save
            # to a temporary file.
            if not isinstance(source, str):
                temp_path = dest_path + '.zip'
                with open(temp_path, 'wb') as f:
                    shutil.copyfileobj(source, f)
                source = temp_path
                delete_source = True

            exitcode = subprocess.call(
                ['unzip', '-q', source, '-d', dest_path])
            if exitcode != 0:
                raise UsageError('Invalid archive upload.')
        finally:
            if delete_source:
                path_util.remove(source)
Ejemplo n.º 35
0
 def install_dependencies(self, bundle_store, parent_dict, dest_path, copy):
     '''
     Symlink or copy this bundle's dependencies into the directory at dest_path.
     The caller is responsible for cleaning up this directory.
     '''
     precondition(os.path.isabs(dest_path), '%s is a relative path!' % (dest_path,))
     pairs = self.get_dependency_paths(bundle_store, parent_dict, dest_path, relative_symlinks=not copy)
     for (target, link_path) in pairs:
         # If the dependency already exists, remove it (this happens when we are reinstalling)
         if os.path.exists(link_path):
             path_util.remove(link_path)
         # Either copy (but not follow further symlinks) or symlink.
         if copy:
             path_util.copy(target, link_path, follow_symlinks=False)
         else:
             os.symlink(target, link_path)
Ejemplo n.º 36
0
def unpack(ext, source, dest_path):
    """
    Unpack the archive |source| to |dest_path|.
    Note: |source| can be a file handle or a path.
    |ext| contains the extension of the archive.
    """
    if ext != '.zip':
        close_source = False
        try:
            if isinstance(source, basestring):
                source = open(source, 'rb')
                close_source = True

            if ext == '.tar.gz' or ext == '.tgz':
                un_tar_directory(source, dest_path, 'gz')
            elif ext == '.tar.bz2':
                un_tar_directory(source, dest_path, 'bz2')
            elif ext == '.bz2':
                un_bz2_file(source, dest_path)
            elif ext == '.gz':
                with open(dest_path, 'wb') as f:
                    shutil.copyfileobj(un_gzip_stream(source), f)
            else:
                raise UsageError('Not an archive.')
        except (tarfile.TarError, IOError):
            raise UsageError('Invalid archive upload.')
        finally:
            if close_source:
                source.close()
    else:
        delete_source = False
        try:
            # unzip doesn't accept input from standard input, so we have to save
            # to a temporary file.
            if not isinstance(source, basestring):
                temp_path = dest_path + '.zip'
                with open(temp_path, 'wb') as f:
                    shutil.copyfileobj(source, f)
                source = temp_path
                delete_source = True

            exitcode = subprocess.call(['unzip', '-q', source, '-d', dest_path])
            if exitcode != 0:
                raise UsageError('Invalid archive upload.')
        finally:
            if delete_source:
                path_util.remove(source)
Ejemplo n.º 37
0
 def run_bundle(self, bundle):
   '''
   Run the given bundle and then update its state to be either READY or FAILED.
   If the bundle is now READY, its data_hash should be set.
   '''
   # Check that we're running a bundle in the RUNNING state.
   state_message = 'Unexpected bundle state: %s' % (bundle.state,)
   precondition(bundle.state == State.RUNNING, state_message)
   data_hash_message = 'Unexpected bundle data_hash: %s' % (bundle.data_hash,)
   precondition(bundle.data_hash is None, data_hash_message)
   # Compute a dict mapping parent_uuid -> parent for each dep of this bundle.
   parent_uuids = set(dep.parent_uuid for dep in bundle.dependencies)
   parents = self.model.batch_get_bundles(uuid=parent_uuids)
   parent_dict = {parent.uuid: parent for parent in parents}
   # Create a scratch directory to run the bundle in.
   with self.profile('Creating temp directory...'):
     temp_dir = tempfile.mkdtemp()
   # Run the bundle. Mark it READY if it is successful and FAILED otherwise.
   with self.profile('Running bundle...'):
     print '\n-- Run started! --\nRunning %s.' % (bundle,)
     try:
       (data_hash, metadata) = bundle.run(
         self.bundle_store, parent_dict, temp_dir)
       self.finalize_run(bundle, State.READY, data_hash, metadata)
       print 'Got data hash: %s\n-- Success! --\n' % (data_hash,)
     except Exception:
       # TODO(skishore): Add metadata updates: time / CPU of run.
       (type, error, tb) = sys.exc_info()
       with self.profile('Uploading failed bundle...'):
         (data_hash, metadata) = self.upload_failed_bundle(error, temp_dir)
       failure_message = '%s: %s' % (error.__class__.__name__, error)
       if data_hash:
         suffix = 'The results of the failed execution were uploaded.'
         failure_message = '%s\n%s' % (failure_message, suffix)
       elif not isinstance(error, UsageError):
         failure_message = 'Traceback:\n%s\n%s' % (
           ''.join(traceback.format_tb(tb))[:-1],
           failure_message,
         )
       metadata.update({'failure_message': failure_message})
       self.finalize_run(bundle, State.FAILED, data_hash, metadata)
       print '-- FAILED! --\n%s\n' % (failure_message,)
   # Clean up after the run.
   with self.profile('Cleaning up temp directory...'):
     path_util.remove(temp_dir)
Ejemplo n.º 38
0
 def upload_bundle_zip(self, file_uuid, construct_args, worksheet_uuid, follow_symlinks):
     '''
     Unzip the zip in the temp file identified by the given file uuid and then
     upload the unzipped directory. Return the new bundle's id.
     Note: delete the file_uuid file, because it's temporary!
     '''
     if file_uuid:
         zip_path = self.file_paths[file_uuid]  # Note: cheat and look at file_server's data
         precondition(zip_path, 'Unexpected file uuid: %s' % (file_uuid,))
         container_path = tempfile.mkdtemp()  # Make temporary directory
         path = zip_util.unzip(zip_path, container_path)  # Unzip
     else:
         path = None
     result = self.client.upload_bundle(path, construct_args, worksheet_uuid, follow_symlinks)
     if file_uuid:
         path_util.remove(container_path)  # Remove temporary directory
         self.finalize_file(file_uuid, True)  # Remove temporary zip
     return result
Ejemplo n.º 39
0
def zip(path, follow_symlinks, exclude_patterns, file_name):
    '''
    Take a path to a file or directory |path| and return the path to a zip archive
    containing its contents.  |file_name| is what the zip archive contains.
    '''
    if isinstance(path, list):
        for p in path:
            absolute_path = path_util.normalize(p)
            path_util.check_isvalid(absolute_path, 'zip_directory')
    else:
        absolute_path = path_util.normalize(path)
        path_util.check_isvalid(absolute_path, 'zip_directory')

    # Recursively copy the directory into a temp directory.
    temp_path = tempfile.mkdtemp()
    temp_subpath = os.path.join(temp_path, file_name)

    print_util.open_line('Copying %s to %s' % (path, temp_subpath))
    if isinstance(path, list):
        os.mkdir(temp_subpath)
        for p in path:
            absolute_path = path_util.normalize(p)
            path_util.copy(absolute_path,
                           os.path.join(temp_subpath, os.path.basename(p)),
                           follow_symlinks=follow_symlinks,
                           exclude_patterns=exclude_patterns)
    else:
        absolute_path = path_util.normalize(path)
        path_util.copy(absolute_path,
                       temp_subpath,
                       follow_symlinks=follow_symlinks,
                       exclude_patterns=exclude_patterns)
    print_util.clear_line()

    zip_path = temp_path + '.zip'
    opts = '-qr'
    if not follow_symlinks: opts += ' --symlinks'
    print_util.open_line('Zipping to %s' % zip_path)
    if os.system("cd %s && zip %s %s %s" %
                 (temp_path, opts, zip_path, file_name)) != 0:
        raise UsageError('zip failed')

    path_util.remove(temp_path)
    return zip_path
Ejemplo n.º 40
0
    def run_bundle(self, bundle):
        """
        Run the given bundle and then update its state to be either READY or FAILED.
        If the bundle is now READY, its data_hash should be set.
        """
        # Check that we're running a bundle in the RUNNING state.
        state_message = "Unexpected bundle state: %s" % (bundle.state,)
        precondition(bundle.state == State.RUNNING, state_message)
        data_hash_message = "Unexpected bundle data_hash: %s" % (bundle.data_hash,)
        precondition(bundle.data_hash is None, data_hash_message)
        # Compute a dict mapping parent_uuid -> parent for each dep of this bundle.
        parent_uuids = set(dep.parent_uuid for dep in bundle.dependencies)
        parents = self.model.batch_get_bundles(uuid=parent_uuids)
        parent_dict = {parent.uuid: parent for parent in parents}

        # Get temp directory
        temp_dir = canonicalize.get_current_location(self.bundle_store, bundle.uuid)

        # Run the bundle. Mark it READY if it is successful and FAILED otherwise.
        with self.profile("Running bundle..."):
            print "-- START RUN: %s" % (bundle,)
            try:
                (data_hash, metadata) = bundle.run(self.bundle_store, parent_dict, temp_dir)
                state = State.READY
            except Exception:
                # TODO(pliang): distinguish between internal CodaLab error and the program failing
                # TODO(skishore): Add metadata updates: time / CPU of run.
                (type, error, tb) = sys.exc_info()
                with self.profile("Uploading failed bundle..."):
                    (data_hash, metadata) = self.upload_failed_bundle(error, temp_dir)
                failure_message = "%s: %s" % (error.__class__.__name__, error)
                if data_hash:
                    suffix = "The results of the failed execution were uploaded."
                    failure_message = "%s\n%s" % (failure_message, suffix)
                elif not isinstance(error, UsageError):
                    failure_message = "Traceback:\n%s\n%s" % ("".join(traceback.format_tb(tb))[:-1], failure_message)
                metadata.update({"failure_message": failure_message})
                state = State.FAILED
            self.finalize_run(bundle, state, data_hash, metadata)
            print "-- END RUN: %s [%s]" % (bundle, state)
        # Clean up after the run.
        with self.profile("Cleaning up temp directory..."):
            path_util.remove(temp_dir)
Ejemplo n.º 41
0
 def upload_bundle_zip(self, file_uuid, construct_args, worksheet_uuid,
                       follow_symlinks, add_to_worksheet):
     '''
     |file_uuid| specifies a pointer to the temporary file X.
     - If X is a non-zip file, then just upload X as an ordinary file.
     - If X is a zip file containing one file/directory Y representing bundle, then upload Y.
     - If X is a zip file containing multiple files/directories, then upload X.
     Return the new bundle's uuid.
     Note: delete the file_uuid file and X if needed (these are temporary files).
     '''
     if file_uuid:
         orig_path = self.file_paths[
             file_uuid]  # Note: cheat and look at file_server's data
         precondition(orig_path, 'Unexpected file uuid: %s' % (file_uuid, ))
         if zip_util.is_zip_file(orig_path):
             container_path = tempfile.mkdtemp()  # Make temporary directory
             zip_util.unzip(orig_path, container_path,
                            file_name=None)  # Unzip into a directory
             # If the container path only has one item, then make that the final path
             sub_files = os.listdir(container_path)
             if len(sub_files) == 1:
                 final_path = os.path.join(container_path, sub_files[0])
             else:  # Otherwise, use the container path
                 final_path = container_path
                 container_path = None
         else:
             # Not a zip file!  Just upload it normally as a file.
             final_path = orig_path
             container_path = None  # Don't need to delete
     else:
         final_path = None
     result = self.client.upload_bundle(final_path,
                                        construct_args,
                                        worksheet_uuid,
                                        follow_symlinks,
                                        exclude_patterns=[],
                                        add_to_worksheet=add_to_worksheet)
     if file_uuid:
         if container_path:
             path_util.remove(container_path)  # Remove temporary directory
         self.finalize_file(
             file_uuid, final_path != orig_path)  # Remove temporary file
     return result
Ejemplo n.º 42
0
    def download_target(self, target, follow_symlinks, return_zip=False):
        # Create remote zip file, download to local zip file
        (fd, zip_path) = tempfile.mkstemp(dir=tempfile.gettempdir())
        os.close(fd)
        source_uuid, name = self.open_target_zip(target, follow_symlinks)
        source = RPCFileHandle(source_uuid, self.proxy)
        with open(zip_path, 'wb') as dest:
            with contextlib.closing(source):
                file_util.copy(source, dest, autoflush=False, print_status='Downloading %s on %s to %s' % ('/'.join(target), self.address, zip_path))

        self.finalize_file(source_uuid, True)  # Delete remote zip file
        # Unpack the local zip file
        container_path = tempfile.mkdtemp()
        if return_zip:
            return zip_path, container_path

        result_path = zip_util.unzip(zip_path, container_path, name)
        path_util.remove(zip_path)  # Delete local zip file

        return (result_path, container_path)
Ejemplo n.º 43
0
 def install_dependencies(self, bundle_store, parent_dict, dest_path, copy):
     '''
     Symlink or copy this bundle's dependencies into the directory at dest_path.
     The caller is responsible for cleaning up this directory.
     '''
     precondition(os.path.isabs(dest_path),
                  '%s is a relative path!' % (dest_path, ))
     pairs = self.get_dependency_paths(bundle_store,
                                       parent_dict,
                                       dest_path,
                                       relative_symlinks=not copy)
     for (target, link_path) in pairs:
         # If the dependency already exists, remove it (this happens when we are reinstalling)
         if os.path.exists(link_path):
             path_util.remove(link_path)
         # Either copy (but not follow further symlinks) or symlink.
         if copy:
             path_util.copy(target, link_path, follow_symlinks=False)
         else:
             os.symlink(target, link_path)
Ejemplo n.º 44
0
    def upload_to_bundle_store(self, bundle: Bundle, source: Source, git: bool,
                               unpack: bool):
        """Uploads the given source to the bundle store.
        Given arguments are the same as UploadManager.upload_to_bundle_store().
        Used when uploading from rest server."""
        try:
            # bundle_path = self._bundle_store.get_bundle_location(bundle.uuid)
            is_url, is_fileobj, filename = self._interpret_source(source)
            if is_url:
                assert isinstance(source, str)
                if git:
                    bundle_path = self._update_and_get_bundle_location(
                        bundle, is_directory=True)
                    self.write_git_repo(source, bundle_path)
                else:
                    # If downloading from a URL, convert the source to a file object.
                    is_fileobj = True
                    source = (filename, urlopen_with_retry(source))
            if is_fileobj:
                source_filename, source_fileobj = cast(Tuple[str, IO[bytes]],
                                                       source)
                source_ext = zip_util.get_archive_ext(source_filename)
                if unpack and zip_util.path_is_archive(filename):
                    bundle_path = self._update_and_get_bundle_location(
                        bundle, is_directory=source_ext in ARCHIVE_EXTS_DIR)
                    self.write_fileobj(source_ext,
                                       source_fileobj,
                                       bundle_path,
                                       unpack_archive=True)
                else:
                    bundle_path = self._update_and_get_bundle_location(
                        bundle, is_directory=False)
                    self.write_fileobj(source_ext,
                                       source_fileobj,
                                       bundle_path,
                                       unpack_archive=False)

        except UsageError:
            if FileSystems.exists(bundle_path):
                path_util.remove(bundle_path)
            raise
Ejemplo n.º 45
0
def unpack(source, dest_path):
    """
    Unpack the archive |source_path| to |dest_path|.
    Note: |source| can be a file handle or a path.
    """
    # Unpack to a temporary location.
    # TODO: guard against zip bombs.  Put a maximum limit and enforce it here.
    # In the future, we probably don't want to be unpacking things all over the place.
    tmp_path = tempfile.mkdtemp('-zip_util.unpack')
    if isinstance(source, basestring):
        source_path = source
        if source_path.endswith('tar.gz') or source_path.endswith('tgz'):
            exitcode = subprocess.call(['tar', 'xfz', source_path, '-C', tmp_path])
        elif source_path.endswith('tar.bz2'):
            exitcode = subprocess.call(['tar', 'xfj', source_path, '-C', tmp_path])
        elif source_path.endswith('zip'):
            exitcode = subprocess.call(['unzip', '-q', source_path, '-d', tmp_path])
        elif source_path.endswith('.gz'):
            with open(os.path.join(tmp_path, os.path.basename(strip_archive_ext(source_path))), 'wb') as f:
                exitcode = subprocess.call(['gunzip', '-q', '-c', source_path], stdout=f)
        else:
            raise UsageError('Not an archive: %s' % source_path)
        if exitcode != 0:
            raise UsageError('Error unpacking %s' % source_path)
    else:
        # File handle, stream the contents!
        source_handle = source
        proc = subprocess.Popen(['tar', 'xfz', '-', '-C', tmp_path], stdin=subprocess.PIPE)
        file_util.copy(source_handle, proc.stdin, print_status='Downloading and unpacking to %s' % tmp_path)
        proc.stdin.close()
        proc.wait()

    # Move files into the right place.
    # If archive only contains one path, then use that.
    files = [f for f in os.listdir(tmp_path) if not ignore_file(f)]
    if len(files) == 1:
        path_util.rename(os.path.join(tmp_path, files[0]), dest_path)
        path_util.remove(tmp_path)
    else:
        path_util.rename(tmp_path, dest_path)
Ejemplo n.º 46
0
    def upload_bundle(self, path, info, worksheet_uuid, follow_symlinks,
                      exclude_patterns, add_to_worksheet):
        # URLs can be directly passed to the local client.
        if path and not isinstance(path, list) and path_util.path_is_url(path):
            return self.upload_bundle_url(path, info, worksheet_uuid,
                                          follow_symlinks, exclude_patterns)

        # First, zip path up (temporary local zip file).
        if path:
            name = info['metadata']['name']
            zip_path = zip_util.zip(path,
                                    follow_symlinks=follow_symlinks,
                                    exclude_patterns=exclude_patterns,
                                    file_name=name)
            # Copy it up to the server (temporary remote zip file)
            with open(zip_path, 'rb') as source:
                remote_file_uuid = self.open_temp_file()
                dest = RPCFileHandle(remote_file_uuid, self.proxy)
                # FileServer does not expose an API for forcibly flushing writes, so
                # we rely on closing the file to flush it.
                file_util.copy(source,
                               dest,
                               autoflush=False,
                               print_status='Uploading %s%s to %s' %
                               (zip_path, ' (' + info['uuid'] +
                                ')' if 'uuid' in info else '', self.address))
                dest.close()
        else:
            remote_file_uuid = None
            zip_path = None

        # Finally, install the zip file (this will be in charge of deleting that zip file).
        result = self.upload_bundle_zip(remote_file_uuid, info, worksheet_uuid,
                                        follow_symlinks, add_to_worksheet)

        if zip_path:
            path_util.remove(zip_path)  # Remove local zip
        return result
Ejemplo n.º 47
0
    def rm_partition(self, partition):
        """
        Deletes the given disk from the bundle store, and if it is not the last partition, it redistributes the bundles
        from that partition across the remaining partitions.
        """
        # Transfer all of the files to their correct locations.

        if self.__get_num_partitions() == 1:
            """
            Prevent foot-shooting
            """
            print >> sys.stderr, "Error, cannot remove last partition. If you really wish to delete CodaLab, please run the following command:"
            print >> sys.stderr, "      rm -rf %s" % self.codalab_home
            return

        relocations = dict()
        partition_abs_path = os.path.join(self.partitions, partition)
        old_mdata = os.path.join(partition_abs_path, MultiDiskBundleStore.DATA_SUBDIRECTORY)
        old_mtemp = os.path.join(partition_abs_path, MultiDiskBundleStore.TEMP_SUBDIRECTORY)

        try:
            print partition_abs_path
            path_util.check_isvalid(partition_abs_path, 'rm-partition')
        except:
            print >> sys.stderr, "Partition with name '%s' does not exist. Run `cl ls-partitions` to see a list of mounted partitions." % partition
            sys.exit(1)

        # Reset the ring to distribute across remaining partitions
        self.ring.remove_node(partition)
        bundles_to_move = reduce(lambda dirs, files: dirs + files, path_util.ls(old_mdata))

        for bundle in bundles_to_move:
            new_partition = self.ring.get_node(bundle)
            relocations[bundle] = os.path.join(self.partitions, new_partition)

        # Copy all bundles off of the old partition to temp directories on the new partition
        for bundle, partition in relocations.iteritems():
            # temporary directory on the partition
            temp_dir = os.path.join(partition, MultiDiskBundleStore.TEMP_SUBDIRECTORY)
            from_path = os.path.join(old_mdata, bundle)
            to_path = os.path.join(temp_dir, 'stage-%s' % bundle)
            path_util.copy(from_path, to_path)

        # Now that each bundle is on the proper partition, move each from the staging area to the
        # production mdata/ subdirectory on its partition.
        for bundle, partition in relocations.iteritems():
            temp_dir = os.path.join(partition, MultiDiskBundleStore.TEMP_SUBDIRECTORY)
            from_path = os.path.join(temp_dir, 'stage-%s' % bundle)
            to_path = os.path.join(partition, MultiDiskBundleStore.DATA_SUBDIRECTORY, bundle)
            path_util.rename(from_path, to_path)

        # Remove data from partition and unlink from CodaLab
        print >> sys.stderr, "Cleaning bundles off of partition..."
        path_util.remove(old_mdata)
        path_util.remove(old_mtemp)
        print >> sys.stderr, "Unlinking partition %s from CodaLab deployment..." % partition
        path_util.remove(partition_abs_path)
        print >> sys.stderr, "Partition removed successfully from bundle store pool"
Ejemplo n.º 48
0
def zip(path, follow_symlinks, exclude_patterns, file_name):
    '''
    Take a path to a file or directory |path| and return the path to a zip archive
    containing its contents.  |file_name| is what the zip archive contains.
    '''
    if isinstance(path, list):
        for p in path:
            absolute_path = path_util.normalize(p)
            path_util.check_isvalid(absolute_path, 'zip_directory')
    else:
        absolute_path = path_util.normalize(path)
        path_util.check_isvalid(absolute_path, 'zip_directory')

    # Recursively copy the directory into a temp directory.
    temp_path = tempfile.mkdtemp()
    temp_subpath = os.path.join(temp_path, file_name)

    print_util.open_line('Copying %s to %s' % (path, temp_subpath))
    if isinstance(path, list):
        os.mkdir(temp_subpath)
        for p in path:
            absolute_path = path_util.normalize(p)
            path_util.copy(absolute_path, os.path.join(temp_subpath, os.path.basename(p)), follow_symlinks=follow_symlinks, exclude_patterns=exclude_patterns)
    else:
        absolute_path = path_util.normalize(path)
        path_util.copy(absolute_path, temp_subpath, follow_symlinks=follow_symlinks, exclude_patterns=exclude_patterns)
    print_util.clear_line()

    zip_path = temp_path + '.zip'
    opts = '-qr'
    if not follow_symlinks: opts += ' --symlinks'
    print_util.open_line('Zipping to %s' % zip_path)
    if os.system("cd %s && zip %s %s %s" % (temp_path, opts, zip_path, file_name)) != 0:
        raise UsageError('zip failed')

    path_util.remove(temp_path)
    return zip_path
Ejemplo n.º 49
0
    def download_target(self, target, follow_symlinks, return_zip=False):
        # Create remote zip file, download to local zip file
        (fd, zip_path) = tempfile.mkstemp(dir=tempfile.gettempdir())
        os.close(fd)
        source_uuid, name = self.open_target_zip(target, follow_symlinks)
        source = RPCFileHandle(source_uuid, self.proxy)
        with open(zip_path, 'wb') as dest:
            with contextlib.closing(source):
                file_util.copy(source,
                               dest,
                               autoflush=False,
                               print_status='Downloading %s on %s to %s' %
                               ('/'.join(target), self.address, zip_path))

        self.finalize_file(source_uuid, True)  # Delete remote zip file
        # Unpack the local zip file
        container_path = tempfile.mkdtemp()
        if return_zip:
            return zip_path, container_path

        result_path = zip_util.unzip(zip_path, container_path, name)
        path_util.remove(zip_path)  # Delete local zip file

        return (result_path, container_path)
Ejemplo n.º 50
0
class BundleStore(object):
    DATA_SUBDIRECTORY = 'data'
    TEMP_SUBDIRECTORY = 'temp'

    # The amount of time a folder can live in the data and temp
    # directories before it is garbage collected by full_cleanup.
    # Note: this is not used right now since we clear out the bundle store
    # immediately.
    DATA_CLEANUP_TIME = 60
    TEMP_CLEANUP_TIME = 60 * 60

    def __init__(self, codalab_home, direct_upload_paths):
        '''
        codalab_home: data/ is where all the bundles are actually stored, temp/ is temporary
        direct_upload_paths: we can accept file://... uploads from these paths.
        '''
        self.codalab_home = path_util.normalize(codalab_home)
        self.direct_upload_paths = direct_upload_paths
        self.data = os.path.join(self.codalab_home, self.DATA_SUBDIRECTORY)
        self.temp = os.path.join(self.codalab_home, self.TEMP_SUBDIRECTORY)
        self.make_directories()

    def _reset(self):
        '''
        Delete all stored bundles and then recreate the root directories.
        '''
        # Do not run this function in production!
        path_util.remove(self.data)
        path_util.remove(self.temp)
        self.make_directories()

    def make_directories(self):
        '''
        Create the data, and temp directories for this BundleStore.
        '''
        for path in (self.data, self.temp):
            path_util.make_directory(path)

    def get_location(self, data_hash, relative=False):
        '''
        Returns the on-disk location of the bundle with the given data hash.
        '''
        if relative:
            return data_hash
        return os.path.join(self.data, data_hash)

    def get_temp_location(self, identifier):
        '''
        Returns the on-disk location of the temporary bundle directory.
        '''
        return os.path.join(self.temp, identifier)

    def make_temp_location(self, identifier):
        '''
        Creates directory with given name under TEMP_SUBDIRECTORY
        '''
        path_util.make_directory(self.get_temp_location(identifier))

    def upload(self, path, follow_symlinks, exclude_patterns):
        '''
        Copy the contents of the directory at |path| into the data subdirectory,
        in a subfolder named by a hash of the contents of the new data directory.
        If |path| is in a temporary directory, then we just move it.

        Return a (data_hash, metadata) pair, where the metadata is a dict mapping
        keys to precomputed statistics about the new data directory.
        '''
        # Create temporary directory as a staging area.
        # If |path| is already temporary, then we use that directly
        # (with the understanding that |path| will be moved)
        if not isinstance(path, list) and os.path.realpath(path).startswith(
                os.path.realpath(self.temp)):
            temp_path = path
        else:
            temp_path = os.path.join(self.temp, uuid.uuid4().hex)

        if not isinstance(path, list) and path_util.path_is_url(path):
            # Have to be careful.  Want to make sure if we're fetching a URL
            # that points to a file, we are allowing this.
            if path.startswith('file://'):
                path_suffix = path[7:]
                if os.path.islink(path_suffix):
                    raise UsageError('Not allowed to upload symlink %s' %
                                     path_suffix)
                if not any(
                        path_suffix.startswith(f)
                        for f in self.direct_upload_paths):
                    raise UsageError(
                        'Not allowed to upload %s (only %s allowed)' %
                        (path_suffix, self.direct_upload_paths))

            # Download |path| if it is a URL.
            print >> sys.stderr, 'BundleStore.upload: downloading %s to %s' % (
                path, temp_path)
            file_util.download_url(path, temp_path, print_status=True)
        elif path != temp_path:
            # Copy |path| into the temp_path.
            if isinstance(path, list):
                absolute_path = [path_util.normalize(p) for p in path]
                for p in absolute_path:
                    path_util.check_isvalid(p, 'upload')
            else:
                absolute_path = path_util.normalize(path)
                path_util.check_isvalid(absolute_path, 'upload')

            # Recursively copy the directory into a new BundleStore temp directory.
            print_util.open_line('BundleStore.upload: copying %s to %s' %
                                 (absolute_path, temp_path))
            path_util.copy(absolute_path,
                           temp_path,
                           follow_symlinks=follow_symlinks,
                           exclude_patterns=exclude_patterns)
            print_util.clear_line()

        # Multiplex between uploading a directory and uploading a file here.
        # All other path_util calls will use these lists of directories and files.
        if os.path.isdir(temp_path):
            dirs_and_files = path_util.recursive_ls(temp_path)
        else:
            dirs_and_files = ([], [temp_path])

        # Hash the contents of the temporary directory, and then if there is no
        # data with this hash value, move this directory into the data directory.
        print_util.open_line('BundleStore.upload: hashing %s' % temp_path)
        data_hash = '0x%s' % (path_util.hash_directory(temp_path,
                                                       dirs_and_files), )
        print_util.clear_line()
        print_util.open_line('BundleStore.upload: computing size of %s' %
                             temp_path)
        data_size = path_util.get_size(temp_path, dirs_and_files)
        print_util.clear_line()
        final_path = os.path.join(self.data, data_hash)
        final_path_exists = False
        try:
            # If data_hash already exists, then we don't need to move it over.
            os.utime(final_path, None)
            final_path_exists = True
        except OSError, e:
            if e.errno == errno.ENOENT:
                print >> sys.stderr, 'BundleStore.upload: moving %s to %s' % (
                    temp_path, final_path)
                path_util.rename(temp_path, final_path)
            else:
                raise
        if final_path_exists:
            path_util.remove(temp_path)

        # After this operation there should always be a directory at the final path.
        assert (os.path.exists(final_path)
                ), 'Uploaded to %s failed!' % (final_path, )
        return (data_hash, {'data_size': data_size})
 def tearDownClass(cls):
     cls.model.engine.close()
     path_util.remove(cls.test_root)
Ejemplo n.º 52
0
    def upload(self, sources, follow_symlinks, exclude_patterns, git, unpack, remove_sources):
        '''
        |sources|: specifies the locations of the contents to upload.  Each element is either a URL or a local path.
        |follow_symlinks|: for local path(s), whether to follow (resolve) symlinks
        |exclude_patterns|: for local path(s), don't upload these patterns (e.g., *.o)
        |git|: for URL, whether |source| is a git repo to clone.
        |unpack|: for each source in |sources|, whether to unpack it if it's an archive.
        |remove_sources|: remove |sources|.

        If |sources| contains one source, then the bundle contents will be that source.
        Otherwise, the bundle contents will be a directory with each of the sources.
        Exceptions:
        - If |git|, then each source is replaced with the result of running 'git clone |source|'
        - If |unpack| is True or a source is an archive (zip, tar.gz, etc.), then unpack the source.

        Install the contents of the directory at |source| into
        DATA_SUBDIRECTORY in a subdirectory named by a hash of the contents.

        Return a (data_hash, metadata) pair, where the metadata is a dict mapping
        keys to precomputed statistics about the new data directory.
        '''
        to_delete = []

        # Create temporary directory as a staging area and put everything there.
        temp_path = tempfile.mkdtemp('-bundle_store_upload')
        temp_subpaths = []
        for source in sources:
            # Where to save |source| to (might change this value if we unpack).
            temp_subpath = os.path.join(temp_path, os.path.basename(source))
            if remove_sources:
                to_delete.append(source)
            source_unpack = unpack and zip_util.path_is_archive(source)

            if path_util.path_is_url(source):
                # Download the URL.
                print_util.open_line('BundleStore.upload: downloading %s to %s' % (source, temp_path))
                if git:
                    file_util.git_clone(source, temp_subpath)
                else:
                    file_util.download_url(source, temp_subpath, print_status=True)
                    if source_unpack:
                        zip_util.unpack(temp_subpath, zip_util.strip_archive_ext(temp_subpath))
                        path_util.remove(temp_subpath)
                        temp_subpath = zip_util.strip_archive_ext(temp_subpath)
                print_util.clear_line()
            else:
                # Copy the local path.
                source_path = path_util.normalize(source)
                path_util.check_isvalid(source_path, 'upload')

                # Recursively copy the directory into a new BundleStore temp directory.
                print_util.open_line('BundleStore.upload: %s => %s' % (source_path, temp_subpath))
                if source_unpack:
                    zip_util.unpack(source_path, zip_util.strip_archive_ext(temp_subpath))
                    temp_subpath = zip_util.strip_archive_ext(temp_subpath)
                else:
                    if remove_sources:
                        path_util.rename(source_path, temp_subpath)
                    else:
                        path_util.copy(source_path, temp_subpath, follow_symlinks=follow_symlinks, exclude_patterns=exclude_patterns)
                print_util.clear_line()

            temp_subpaths.append(temp_subpath)

        # If exactly one source, then upload that directly.
        if len(temp_subpaths) == 1:
            to_delete.append(temp_path)
            temp_path = temp_subpaths[0]

        # Multiplex between uploading a directory and uploading a file here.
        # All other path_util calls will use these lists of directories and files.
        if os.path.isdir(temp_path):
            dirs_and_files = path_util.recursive_ls(temp_path)
        else:
            dirs_and_files = ([], [temp_path])

        # Hash the contents of the temporary directory, and then if there is no
        # data with this hash value, move this directory into the data directory.
        print_util.open_line('BundleStore.upload: hashing %s' % temp_path)
        data_hash = '0x%s' % (path_util.hash_directory(temp_path, dirs_and_files),)
        print_util.clear_line()
        print_util.open_line('BundleStore.upload: computing size of %s' % temp_path)
        data_size = path_util.get_size(temp_path, dirs_and_files)
        print_util.clear_line()
        final_path = os.path.join(self.data, data_hash)
        if os.path.exists(final_path):
            # Already exists, just delete it
            path_util.remove(temp_path)
        else:
            print >>sys.stderr, 'BundleStore.upload: moving %s to %s' % (temp_path, final_path)
            path_util.rename(temp_path, final_path)

        # Delete paths.
        for path in to_delete:
            if os.path.exists(path):
                path_util.remove(path)

        # After this operation there should always be a directory at the final path.
        assert(os.path.lexists(final_path)), 'Uploaded to %s failed!' % (final_path,)
        return (data_hash, {'data_size': data_size})
Ejemplo n.º 53
0
    def add_partition(self, target, new_partition_name):
        """
        MultiDiskBundleStore specific method. Add a new partition to the bundle store. The "target" is actually a symlink to
        the target directory, which the user has configured as the mountpoint for some desired partition.

        First, all bundles that are to be relocated onto the new partition are copied to a temp location to be resilient
        against failures. After the copy is performed, the bundles are subsequently moved to the new partition, and finally
        the original copy of the bundles are deleted from their old locations
        """
        target = os.path.abspath(target)
        new_partition_location = os.path.join(self.partitions,
                                              new_partition_name)

        mtemp = os.path.join(target, MultiDiskBundleStore.TEMP_SUBDIRECTORY)

        try:
            path_util.make_directory(mtemp)
        except:
            print >> sys.stderr, "Could not make directory %s on partition %s, aborting" % (
                mtemp, target)
            sys.exit(1)

        self.ring.add_node(
            new_partition_name)  # Add the node to the partition locations
        delete_on_success = [
        ]  # Paths to bundles that will be deleted after the copy finishes successfully

        print >> sys.stderr, "Marking bundles for placement on new partition %s (might take a while)" % new_partition_name
        # For each bundle in the bundle store, check to see if any hash to the new partition. If so move them over
        partitions, _ = path_util.ls(self.partitions)
        for partition in partitions:
            partition_abs_path = os.path.join(
                self.partitions, partition,
                MultiDiskBundleStore.DATA_SUBDIRECTORY)
            bundles = reduce(lambda dirs, files: dirs + files,
                             path_util.ls(partition_abs_path))
            for bundle in bundles:
                correct_partition = self.ring.get_node(bundle)
                if correct_partition != partition:
                    # Reposition the node to the correct partition
                    from_path = os.path.join(
                        self.partitions, partition,
                        MultiDiskBundleStore.DATA_SUBDIRECTORY, bundle)
                    to_path = os.path.join(mtemp, bundle)
                    print >> sys.stderr, "copying %s to %s" % (from_path,
                                                               to_path)
                    path_util.copy(from_path, to_path)
                    delete_on_success += [from_path]

        print >> sys.stderr, "Adding new partition as %s..." % new_partition_location
        path_util.soft_link(target, new_partition_location)

        # Atomically move the temp location to the new partition's mdata
        new_mdata = os.path.join(new_partition_location,
                                 MultiDiskBundleStore.DATA_SUBDIRECTORY)
        new_mtemp = os.path.join(new_partition_location,
                                 MultiDiskBundleStore.TEMP_SUBDIRECTORY)
        path_util.rename(new_mtemp, new_mdata)
        path_util.make_directory(new_mtemp)

        # Go through and purge all of the originals at this time
        print >> sys.stderr, "Cleaning up drives..."
        for to_delete in delete_on_success:
            path_util.remove(to_delete)

        print >> sys.stderr, "Successfully added partition '%s' to the pool." % new_partition_name
Ejemplo n.º 54
0
 def _delete_path(loc):
     cmd = 'rm -r \'%s\'' % loc
     print(cmd)
     if force:
         path_util.remove(loc)
Ejemplo n.º 55
0
    def rm_partition(self, partition):
        """
        Deletes the given disk from the bundle store, and if it is not the last partition, it redistributes the bundles
        from that partition across the remaining partitions.
        """
        # Transfer all of the files to their correct locations.

        if self.__get_num_partitions() == 1:
            """
            Prevent foot-shooting
            """
            print >> sys.stderr, "Error, cannot remove last partition. If you really wish to delete CodaLab, please run the following command:"
            print >> sys.stderr, "      rm -rf %s" % self.codalab_home
            return

        relocations = dict()
        partition_abs_path = os.path.join(self.partitions, partition)
        old_mdata = os.path.join(partition_abs_path,
                                 MultiDiskBundleStore.DATA_SUBDIRECTORY)
        old_mtemp = os.path.join(partition_abs_path,
                                 MultiDiskBundleStore.TEMP_SUBDIRECTORY)

        try:
            print partition_abs_path
            path_util.check_isvalid(partition_abs_path, 'rm-partition')
        except:
            print >> sys.stderr, "Partition with name '%s' does not exist. Run `cl ls-partitions` to see a list of mounted partitions." % partition
            sys.exit(1)

        # Reset the ring to distribute across remaining partitions
        self.ring.remove_node(partition)
        bundles_to_move = reduce(lambda dirs, files: dirs + files,
                                 path_util.ls(old_mdata))

        for bundle in bundles_to_move:
            new_partition = self.ring.get_node(bundle)
            relocations[bundle] = os.path.join(self.partitions, new_partition)

        # Copy all bundles off of the old partition to temp directories on the new partition
        for bundle, partition in relocations.iteritems():
            # temporary directory on the partition
            temp_dir = os.path.join(partition,
                                    MultiDiskBundleStore.TEMP_SUBDIRECTORY)
            from_path = os.path.join(old_mdata, bundle)
            to_path = os.path.join(temp_dir, 'stage-%s' % bundle)
            path_util.copy(from_path, to_path)

        # Now that each bundle is on the proper partition, move each from the staging area to the
        # production mdata/ subdirectory on its partition.
        for bundle, partition in relocations.iteritems():
            temp_dir = os.path.join(partition,
                                    MultiDiskBundleStore.TEMP_SUBDIRECTORY)
            from_path = os.path.join(temp_dir, 'stage-%s' % bundle)
            to_path = os.path.join(partition,
                                   MultiDiskBundleStore.DATA_SUBDIRECTORY,
                                   bundle)
            path_util.rename(from_path, to_path)

        # Remove data from partition and unlink from CodaLab
        print >> sys.stderr, "Cleaning bundles off of partition..."
        path_util.remove(old_mdata)
        path_util.remove(old_mtemp)
        print >> sys.stderr, "Unlinking partition %s from CodaLab deployment..." % partition
        path_util.remove(partition_abs_path)
        print >> sys.stderr, "Partition removed successfully from bundle store pool"
Ejemplo n.º 56
0
    def upload_to_bundle_store(
        self,
        bundle,
        sources,
        follow_symlinks,
        exclude_patterns,
        remove_sources,
        git,
        unpack,
        simplify_archives,
    ):
        """
        Uploads contents for the given bundle to the bundle store.

        |sources|: specifies the locations of the contents to upload. Each element is
                   either a URL, a local path or a tuple (filename, binary file-like object).
        |follow_symlinks|: for local path(s), whether to follow (resolve) symlinks,
                           but only if remove_sources is False.
        |exclude_patterns|: for local path(s), don't upload these patterns (e.g., *.o),
                            but only if remove_sources is False.
        |remove_sources|: for local path(s), whether |sources| should be removed
        |git|: for URLs, whether |source| is a git repo to clone.
        |unpack|: for each source in |sources|, whether to unpack it if it's an archive.
        |simplify_archives|: whether to simplify unpacked archives so that if they
                             contain a single file, the final path is just that file,
                             not a directory containing that file.

        If |sources| contains one source, then the bundle contents will be that source.
        Otherwise, the bundle contents will be a directory with each of the sources.
        Exceptions:
        - If |git|, then each source is replaced with the result of running 'git clone |source|'
        - If |unpack| is True or a source is an archive (zip, tar.gz, etc.), then unpack the source.
        """
        exclude_patterns = (self._default_exclude_patterns +
                            exclude_patterns if exclude_patterns else
                            self._default_exclude_patterns)
        bundle_link_url = getattr(bundle.metadata, "link_url", None)
        if bundle_link_url:
            # Don't do anything for linked bundles.
            return
        bundle_path = self._bundle_store.get_bundle_location(bundle.uuid)
        try:
            path_util.make_directory(bundle_path)
            # Note that for uploads with a single source, the directory
            # structure is simplified at the end.
            for source in sources:
                is_url, is_local_path, is_fileobj, filename = self._interpret_source(
                    source)
                source_output_path = os.path.join(bundle_path, filename)
                if is_url:
                    if git:
                        source_output_path = file_util.strip_git_ext(
                            source_output_path)
                        file_util.git_clone(source, source_output_path)
                    else:
                        file_util.download_url(source, source_output_path)
                        if unpack and self._can_unpack_file(
                                source_output_path):
                            self._unpack_file(
                                source_output_path,
                                zip_util.strip_archive_ext(source_output_path),
                                remove_source=True,
                                simplify_archive=simplify_archives,
                            )
                elif is_local_path:
                    source_path = path_util.normalize(source)
                    path_util.check_isvalid(source_path, 'upload')

                    if unpack and self._can_unpack_file(source_path):
                        self._unpack_file(
                            source_path,
                            zip_util.strip_archive_ext(source_output_path),
                            remove_source=remove_sources,
                            simplify_archive=simplify_archives,
                        )
                    elif remove_sources:
                        path_util.rename(source_path, source_output_path)
                    else:
                        path_util.copy(
                            source_path,
                            source_output_path,
                            follow_symlinks=follow_symlinks,
                            exclude_patterns=exclude_patterns,
                        )
                elif is_fileobj:
                    if unpack and zip_util.path_is_archive(filename):
                        self._unpack_fileobj(
                            source[0],
                            source[1],
                            zip_util.strip_archive_ext(source_output_path),
                            simplify_archive=simplify_archives,
                        )
                    else:
                        with open(source_output_path, 'wb') as out:
                            shutil.copyfileobj(source[1], out)

            if len(sources) == 1:
                self._simplify_directory(bundle_path)
        except:
            if os.path.exists(bundle_path):
                path_util.remove(bundle_path)
            raise