Example #1
0
class TemporaryCredentials(object):
    TEMPLATE = u"""
    {
      "oauth2": {
        "client_id":"c169e6d6-d2de-4c56-ac8c-8b7671097e0c",
        "client_secret":"secret",
        "server":"http://localhost:%s"
      },
      "ostree":{
        "server":"http://localhost:%s"
      }
    }
    """

    def __init__(self, port):
        self._tmpfile = NamedTemporaryFile(mode='w+t')
        self._tmpfile.write(self.TEMPLATE % (port, port))
        self._tmpfile.flush()

    def path(self):
        return self._tmpfile.name

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self._tmpfile.__exit__(exc_type, exc_val, exc_tb)
Example #2
0
def TempFile(**kw):
    """NamedTemporaryFile wrapper that doesn't fail if you (re)move the file
    """
    f = NamedTemporaryFile(**kw)
    try:
        yield f
    finally:
        with ignore_missing_file():
            f.__exit__(None, None, None)
Example #3
0
class ZipGenerator(object):
    """
    Generates the zip. Acts as context manager to ensure that all temporary
    files are deleted after usage.
    """

    def __init__(self):
        self.empty = True

    def __enter__(self):
        self.tmp_file = NamedTemporaryFile(prefix="plone_zipexport_")
        self.tmp_file.__enter__()
        self.zip_file = ZipFile(self.tmp_file.name, "w", allowZip64=True)
        self.zip_file.__enter__()
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        self.zip_file.__exit__(exc_type, exc_value, traceback)
        self.tmp_file.__exit__(exc_type, exc_value, traceback)

    def add_file(self, file_path, file_pointer):
        # paths in zipfile do not have a / at the root
        file_path = file_path.strip('/')

        file_path = self.generate_unique_filepath(file_path)

        try:
            self.zip_file.writefile(file_pointer, file_path)
        except RuntimeError:
            raise StandardError("ZipFile already generated/closed. "
                                "Please add all files before generating.")
        self.empty = False

    def generate_unique_filepath(self, file_path):
        if file_path not in self.zip_file.namelist():
            return file_path

        path, name = os.path.split(file_path)
        name, ext = os.path.splitext(name)

        for i in xrange(2, sys.maxint):
            new_filename = os.path.join(path, '%s (%d)%s' % (name, i, ext))
            if new_filename not in self.zip_file.namelist():
                return new_filename

    @property
    def is_empty(self):
        return self.empty

    def generate(self):
        if self.tmp_file is None:
            raise StandardError(
                "Please use ZipGenerator as a context manager.")
        self.zip_file.close()
        return self.tmp_file
Example #4
0
class _NamedTemporaryFileWithContent(object):
    def __init__(self, content):
        self._file = NamedTemporaryFile()
        self._file.write(content)
        self._file.flush()
        self._file.file.seek(0)

    def __enter__(self):
        return self._file.__enter__()

    def __exit__(self, *args):
        self._file.__exit__(*args)
Example #5
0
class SingleTemporaryFile:
    """
    A NamedTemporaryFile wrapper that is created in tmp subdirectory joined with *args strings, using options from
     **kwargs. It will keep only the last temporary file is subdirectory by cleaning up the directory on each call.
    """
    def __init__(self, *args, **kwargs):
        self.tempdir = get_single_temporary_file_directory(*args)
        os.makedirs(self.tempdir, exist_ok=True)

        if "dir" in kwargs:
            kwargs.pop("dir")
        if "delete" in kwargs:
            kwargs.pop("delete")

        self.named_temporary_file = NamedTemporaryFile(dir=self.tempdir,
                                                       delete=False,
                                                       **kwargs)

    def __enter__(self):
        return self.named_temporary_file.__enter__()

    def __exit__(self, exc_type, exc_value, exc_traceback):
        tmp_filepath = self.named_temporary_file.name
        try:
            return self.named_temporary_file.__exit__(exc_type, exc_value,
                                                      exc_traceback)
        finally:
            for previous_temporary_file in os.listdir(self.tempdir):
                previous_temporary_filepath = os.path.join(
                    self.tempdir, previous_temporary_file)
                if previous_temporary_filepath != tmp_filepath:
                    os.remove(previous_temporary_filepath)
Example #6
0
class AtomicFileWriter(object):
    def __init__(self, path, tmp_prefix=None, encoding='utf-8'):
        self.name = path
        output_dir, base = os.path.split(path)
        if tmp_prefix is None:
            tmp_prefix = base + '.'

        self.tmpf = NamedTemporaryFile(dir=output_dir, prefix=tmp_prefix,
                                       mode='w', encoding=encoding,
                                       delete=False)

    def __enter__(self):
        self.tmpf.__enter__()
        return self

    def __exit__(self, exc_type, exc_value, exc_traceback):
        tmp_name = self.tmpf.name
        result = self.tmpf.__exit__(exc_type, exc_value, exc_traceback)
        if result or exc_type is None:
            os.rename(tmp_name, self.name)
        else:
            os.unlink(tmp_name)
        return result

    def write(self, data):
        return self.tmpf.write(data)
Example #7
0
class working_file(object):
    """A context manager for managing a temporary file that will be moved
    to a non-temporary location if no exceptions are raised in the context.

    Parameters
    ----------
    final_path : str
        The location to move the file when committing.
    *args, **kwargs
        Forwarded to NamedTemporaryFile.

    Notes
    -----
    The file is moved on __exit__ if there are no exceptions.
    ``working_file`` uses :func:`shutil.move` to move the actual files,
    meaning it has as strong of guarantees as :func:`shutil.move`.
    """

    def __init__(self, final_path, *args, **kwargs):
        self._tmpfile = NamedTemporaryFile(delete=False, *args, **kwargs)
        self._final_path = final_path

    @property
    def path(self):
        """Alias for ``name`` to be consistent with
        :class:`~zipline.utils.cache.working_dir`.
        """
        return self._tmpfile.name

    def _commit(self):
        """Sync the temporary file to the final path.
        """
        self._tmpfile.close()
        move(self._name, self._final_path)

    def __getattr__(self, attr):
        return getattr(self._tmpfile, attr)

    def __enter__(self):
        self._tmpfile.__enter__()
        return self

    def __exit__(self, *exc_info):
        self._tmpfile.__exit__(*exc_info)
        if exc_info[0] is None:
            self._commit()
Example #8
0
class working_file(object):
    """A context manager for managing a temporary file that will be moved
    to a non-temporary location if no exceptions are raised in the context.

    Parameters
    ----------
    final_path : str
        The location to move the file when committing.
    *args, **kwargs
        Forwarded to NamedTemporaryFile.

    Notes
    -----
    The file is moved on __exit__ if there are no exceptions.
    ``working_file`` uses :func:`shutil.move` to move the actual files,
    meaning it has as strong of guarantees as :func:`shutil.move`.
    """
    def __init__(self, final_path, *args, **kwargs):
        self._tmpfile = NamedTemporaryFile(delete=False, *args, **kwargs)
        self._final_path = final_path

    @property
    def path(self):
        """Alias for ``name`` to be consistent with
        :class:`~zipline.utils.cache.working_dir`.
        """
        return self._tmpfile.name

    def _commit(self):
        """Sync the temporary file to the final path.
        """
        self._tmpfile.close()
        move(self._name, self._final_path)

    def __getattr__(self, attr):
        return getattr(self._tmpfile, attr)

    def __enter__(self):
        self._tmpfile.__enter__()
        return self

    def __exit__(self, *exc_info):
        self._tmpfile.__exit__(*exc_info)
        if exc_info[0] is None:
            self._commit()
Example #9
0
class TempFilePool:
    """
    Create a temporary file in the temp "pool" that can be closed and reopened without going away. The entire pool of
    temporary files will be cleared when the process exits.

    Example usage:

    >>> with TempFilePool() as f:
    >>>    name = str(f)
    >>> with h5py.File(name, 'w') as hdf_write:
    >>>    hdf_write.create_dataset(...)
    >>> hdf_read = h5py.File(name, 'r')

    When the resulting tempfile is meant to remain open (i.e. not in a file-open context), then
    use "register_to_close" to attempt closing before deleting

    >>> f.register_to_close(hdf_read)

    """

    pool_dir = os.path.join(os.path.abspath(os.path.curdir), _rand_temp_path)

    def __init__(self, *args, **kwargs):
        kwargs['dir'] = TempFilePool.pool_dir
        kwargs['delete'] = False
        if not os.path.exists(TempFilePool.pool_dir):
            os.makedirs(TempFilePool.pool_dir)
            with open(os.path.join(TempFilePool.pool_dir, 'README.txt'), 'w') as fw:
                fw.write('This directory contains temporary files for memory mapped arrays. '
                         'Under normal circumstances, this will be deleted when the process ends. '
                         'If you are reading this after a process has stopped, please delete!\n')
        self.tf = NamedTemporaryFile(*args, **kwargs)

    def __str__(self):
        return self.tf.name

    def __repr__(self):
        return repr(self.tf.name)

    def __enter__(self):
        self.tf.__enter__()
        return self

    def __exit__(self, exc, value, tb):
        return self.tf.__exit__(exc, value, tb)

    def register_to_close(self, file_handle):
        def closer():
            try:
                file_handle.close()
                print('closed file {}'.format(self))
            except Exception:
                print('Could not close file {}'.format(self))
                pass
        atexit.register(closer)
Example #10
0
def sky_share(subproc, directory, files):
    out = NamedTemporaryFile('w')

    argv = ['sky', 'share', '-d', directory] + files
    logging.debug(argv)

    # FIXME collect STDERR
    try:
        p = subproc.Popen(argv, stdout=out.name)
    except:
        try:
            raise
        finally:
            try:
                out.__exit__(None, None, None)
            except:
                pass

    def finalize(exit_code):
        try:
            status = exit_code.get()
            if status:
                raise RuntimeError("sky share failed with exit status: %d" % status)

            with open(out.name) as in_:
                id = in_.readline().rstrip('\n')

            if not id.startswith('rbtorrent:'):
                raise RuntimeError('Malformed output of sky share: %s' % id[:100])

            return id

        finally:
            try:
                out.__exit__(None, None, None)
            except:
                pass

    return wrap_future(p.get_returncode_future(), finalize)
Example #11
0
class working_file(object):
    """A context manager for managing a temporary file that will be moved
    to a non-temporary location if no exceptions are raised in the context.

    Parameters
    ----------
    final_path : str
        The location to move the file when committing.
    *args, **kwargs
        Forwarded to NamedTemporaryFile.

    Notes
    -----
    The file is moved on __exit__ if there are no exceptions.
    ``working_file`` uses :func:`shutil.copyfile` to move the actual files,
    meaning it has as strong of guarantees as :func:`shutil.copyfile`.
    """
    def __init__(self, final_path, *args, **kwargs):
        self._tmpfile = NamedTemporaryFile(*args, **kwargs)
        self._final_path = final_path

    def _commit(self):
        """Sync the temporary file to the final path.
        """
        copyfile(self.name, self._final_path)

    def __getattr__(self, attr):
        return getattr(self._tmpfile, attr)

    def __enter__(self):
        self._tmpfile.__enter__()
        return self

    def __exit__(self, *exc_info):
        if exc_info[0] is None:
            self._commit()
        self._tmpfile.__exit__(*exc_info)
Example #12
0
class FileReplacer(object):
    """
    Create or replace a file in filesystem atomically.

    Acts as a context manager.  Entering the context returns a file
    handle to a writable (hidden) temporary file holding the contents
    until moving the temporary file to the destination path on succesful
    close (at context manager exit).  Any existing file at the
    destination path will be replaced.

    Contents of the temporary file will be discarded, if any exception
    is raised while in the context.
    """
    tmpfile = None

    def __init__(self, dest_path):
        self.dest_path = dest_path
        self.tmpfile = NamedTemporaryFile(
            dir=os.path.dirname(dest_path),
            prefix=('.' + os.path.basename(dest_path) + '-'),
            delete=False)

    def __enter__(self):
        return self.tmpfile.__enter__()

    def __exit__(self, exc_type=None, exc_val=None, exc_tb=None):
        result = self.tmpfile.__exit__(exc_type, exc_val, exc_tb)
        self.close(do_replace=(exc_type is None))
        return result

    def __del__(self):
        self.close(do_replace=False)

    def close(self, do_replace=True):
        replaced = False
        if self.tmpfile:
            tmppath = self.tmpfile.name
            try:
                self.tmpfile.close()
                self.tmpfile = None
                if do_replace:
                    replace(tmppath, self.dest_path)
                    replaced = True
            finally:
                if not replaced:
                    os.unlink(tmppath)
        return replaced
Example #13
0
class ZipGenerator(object):
    """
    Generates the zip. Acts as context manager to ensure that all temporary
    files are deleted after usage.
    """

    def __init__(self, path_normalizer=normalize_path):
        self.empty = True
        self.path_normalizer = path_normalizer

    def __enter__(self):
        self.tmp_file = NamedTemporaryFile(prefix="plone_zipexport_")
        self.tmp_file.__enter__()
        self.zip_file = ZipFile(self.tmp_file.name, "w", allowZip64=True)
        self.zip_file.__enter__()
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        self.zip_file.__exit__(exc_type, exc_value, traceback)
        self.tmp_file.__exit__(exc_type, exc_value, traceback)

    def add_folder(self, folder_path):
        folder_path = safe_unicode(folder_path)

        # Always add a slash at the end of the path
        folder_path = u'{}/'.format(folder_path.strip('/'))

        # Creates a new empty folder
        self.zip_file.writestr(zipfile.ZipInfo(folder_path), '')

        self.empty = False

    def add_file(self, file_path, file_pointer):
        if self.path_normalizer is not None:
            file_path = self.path_normalizer(file_path)
        else:
            file_path = safe_unicode(file_path)

        # paths in zipfile do not have a / at the root
        file_path = file_path.strip('/')

        file_path = self.generate_unique_filepath(file_path)

        if not self.check_disk_has_space_for_file(file_pointer):
            raise NotEnoughSpaceOnDiskException()

        try:
            self.zip_file.writefile(file_pointer, file_path)
        except RuntimeError:
            raise StandardError("ZipFile already generated/closed. "
                                "Please add all files before generating.")
        self.empty = False

    def generate_unique_filepath(self, file_path):
        if file_path not in self.zip_file.namelist():
            return file_path

        path, name = os.path.split(file_path)
        name, ext = os.path.splitext(name)

        for i in xrange(2, sys.maxint):
            new_filename = os.path.join(path, '%s (%d)%s' % (name, i, ext))
            if new_filename not in self.zip_file.namelist():
                return new_filename

    def check_disk_has_space_for_file(self, file_d):
        disk_stat = os.statvfs(self.tmp_file.name)
        bytes_free = disk_stat.f_frsize * disk_stat.f_bavail
        position = file_d.tell()
        file_d.seek(0, os.SEEK_END)
        file_size = file_d.tell() - position
        file_d.seek(position)
        return file_size < bytes_free

    @property
    def is_empty(self):
        return self.empty

    def generate(self):
        if self.tmp_file is None:
            raise StandardError(
                "Please use ZipGenerator as a context manager.")
        self.zip_file.close()
        return self.tmp_file
Example #14
0
class ZipGenerator(object):
    """
    Generates the zip. Acts as context manager to ensure that all temporary
    files are deleted after usage.
    """
    def __init__(self, path_normalizer=normalize_path):
        self.empty = True
        self.path_normalizer = path_normalizer

    def __enter__(self):
        self.tmp_file = NamedTemporaryFile(prefix="plone_zipexport_")
        self.tmp_file.__enter__()
        self.zip_file = ZipFile(self.tmp_file.name, "w", allowZip64=True)
        self.zip_file.__enter__()
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        self.zip_file.__exit__(exc_type, exc_value, traceback)
        self.tmp_file.__exit__(exc_type, exc_value, traceback)

    def add_folder(self, folder_path):
        folder_path = safe_unicode(folder_path)

        # Always add a slash at the end of the path
        folder_path = u'{}/'.format(folder_path.strip('/'))

        # Creates a new empty folder
        self.zip_file.writestr(zipfile.ZipInfo(folder_path), '')

        self.empty = False

    def add_file(self, file_path, file_pointer):
        if self.path_normalizer is not None:
            file_path = self.path_normalizer(file_path)
        else:
            file_path = safe_unicode(file_path)

        # paths in zipfile do not have a / at the root
        file_path = file_path.strip('/')

        file_path = self.generate_unique_filepath(file_path)

        if not self.check_disk_has_space_for_file(file_pointer):
            raise NotEnoughSpaceOnDiskException()

        try:
            self.zip_file.writefile(file_pointer, file_path)
        except RuntimeError:
            raise StandardError("ZipFile already generated/closed. "
                                "Please add all files before generating.")
        self.empty = False

    def generate_unique_filepath(self, file_path):
        if file_path not in self.zip_file.namelist():
            return file_path

        path, name = os.path.split(file_path)
        name, ext = os.path.splitext(name)

        for i in xrange(2, sys.maxint):
            new_filename = os.path.join(path, '%s (%d)%s' % (name, i, ext))
            if new_filename not in self.zip_file.namelist():
                return new_filename

    def check_disk_has_space_for_file(self, file_d):
        disk_stat = os.statvfs(self.tmp_file.name)
        bytes_free = disk_stat.f_frsize * disk_stat.f_bavail
        position = file_d.tell()
        file_d.seek(0, os.SEEK_END)
        file_size = file_d.tell() - position
        file_d.seek(position)
        return file_size < bytes_free

    @property
    def is_empty(self):
        return self.empty

    def generate(self):
        if self.tmp_file is None:
            raise StandardError(
                "Please use ZipGenerator as a context manager.")
        self.zip_file.close()
        return self.tmp_file
class CaseExporterBase:
    """Base class for export of (filtered) case data from single case or all cases of a project.
    """

    #: The query class to use for building single-case queries.
    query_class_single_case = None
    #: The query class to use for building project-wide queries.
    query_class_project_cases = None

    def __init__(self, job, case_or_project):
        #: The ``ExportFileBgJob`` or ``DistillerSubmissionBgJob`` to use for logging.  Variants are obtained
        #: from ``case_or_project``.
        self.job = job
        #: The case to export for, if any.
        self.case = None
        #: The project to export for, if any.
        self.project = None
        if isinstance(case_or_project, Case):
            self.case = case_or_project
        else:
            self.project = case_or_project
        #: The SQL Alchemy connection to use.
        self._alchemy_engine = None
        #: The query arguments.
        self.query_args = job.query_args
        #: The named temporary file object to use for file handling.
        self.tmp_file = None
        #: The wrapper for running queries.
        self.query = None
        if self.project:
            self.query = self.query_class_project_cases(
                self.project, self.get_alchemy_engine())
        else:
            self.query = self.query_class_single_case(
                self.case, self.get_alchemy_engine())
        #: The name of the selected members.
        self.members = list(self._yield_members())
        #: The column information.
        self.columns = list(self._yield_columns(self.members))

    def get_alchemy_engine(self):
        if not self._alchemy_engine:
            self._alchemy_engine = SQLALCHEMY_ENGINE
        return self._alchemy_engine

    def _is_prioritization_enabled(self):
        """Return whether prioritization is enabled in this query."""
        return settings.VARFISH_ENABLE_EXOMISER_PRIORITISER and all((
            self.query_args.get("prio_enabled"),
            self.query_args.get("prio_algorithm"),
            self.query_args.get("prio_hpo_terms", []),
        ))

    def _is_pathogenicity_enabled(self):
        """Return whether pathogenicity scoring is enabled in this query."""
        return settings.VARFISH_ENABLE_CADD and all(
            (self.query_args.get("patho_enabled"),
             self.query_args.get("patho_score")))

    def _yield_members(self):
        """Get list of selected members."""
        if self.project:
            yield "sample"
        else:
            for m in self.job.case.get_filtered_pedigree_with_samples():
                if self.query_args.get("%s_export" % m["patient"], False):
                    yield m["patient"]

    def _yield_columns(self, members):
        """Yield column information."""
        if self.project:
            header = [("sample_name", "Sample", str)]
        else:
            header = []
        header += HEADER_FIXED
        if self._is_prioritization_enabled():
            header += HEADERS_PHENO_SCORES
        if self._is_pathogenicity_enabled():
            header += HEADERS_PATHO_SCORES
        if self._is_prioritization_enabled(
        ) and self._is_pathogenicity_enabled():
            header += HEADERS_JOINT_SCORES
        if self.query_args["export_flags"]:
            header += HEADER_FLAGS
        if self.query_args["export_comments"]:
            header += HEADER_COMMENTS
        for lst in header:
            yield dict(
                zip(("name", "title", "type", "fixed"),
                    list(lst) + [True]))
        for member in members:
            for name, title, type_ in HEADER_FORMAT:
                yield {
                    "name": "%s.%s" % (member, name),
                    "title": "%s %s" % (member, title),
                    "type": type_,
                    "fixed": False,
                }

    def _yield_smallvars(self):
        """Use this for yielding the resulting small variants one-by-one."""
        prev_chrom = None
        self.job.add_log_entry("Executing database query...")
        with contextlib.closing(self.query.run(self.query_args)) as result:
            self.job.add_log_entry("Executing phenotype score query...")
            _result = list(result)
            if self._is_prioritization_enabled():
                gene_scores = self._fetch_gene_scores(
                    [entry.entrez_id for entry in _result])
                _result = annotate_with_phenotype_scores(_result, gene_scores)
            if self._is_pathogenicity_enabled():
                variant_scores = self._fetch_variant_scores([(
                    entry["chromosome"],
                    entry["start"],
                    entry["reference"],
                    entry["alternative"],
                ) for entry in _result])
                _result = annotate_with_pathogenicity_scores(
                    _result, variant_scores)
            if self._is_prioritization_enabled(
            ) and self._is_pathogenicity_enabled():
                _result = annotate_with_joint_scores(_result)
            self.job.add_log_entry("Writing output file...")
            for small_var in _result:
                if small_var.chromosome != prev_chrom:
                    self.job.add_log_entry("Now on chromosome chr{}".format(
                        small_var.chromosome))
                    prev_chrom = small_var.chromosome
                if self.project:
                    for sample in sorted(small_var.genotype.keys()):
                        yield RowWithSampleProxy(small_var, sample)
                else:
                    yield small_var

    def _fetch_gene_scores(self, entrez_ids):
        if self._is_prioritization_enabled():
            try:
                prio_algorithm = self.query_args.get("prio_algorithm")
                hpo_terms = tuple(
                    sorted(self.query_args.get("prio_hpo_terms_curated", [])))
                return {
                    str(gene_id): score
                    for gene_id, _, score, _ in prioritize_genes(
                        entrez_ids, hpo_terms, prio_algorithm)
                }
            except ConnectionError as e:
                self.job.add_log_entry(e)
        else:
            return {}

    def _fetch_variant_scores(self, variants):
        if self._is_pathogenicity_enabled():
            try:
                patho_score = self.query_args.get("patho_score")
                scorer_factory = VariantScoresFactory()
                scorer = scorer_factory.get_scorer(patho_score, variants,
                                                   self.job.bg_job.user)
                return {
                    "-".join([
                        score["release"],
                        score["chromosome"],
                        str(score["start"]),
                        score["reference"],
                        score["alternative"],
                    ]): (score["score"], score["info"])
                    for score in scorer.score()
                }
            except ConnectionError as e:
                self.job.add_log_entry(e)
        else:
            return {}

    def _get_named_temporary_file_args(self):
        return {}

    def __enter__(self):
        self.tmp_file = NamedTemporaryFile(
            **self._get_named_temporary_file_args())
        self.tmp_file.__enter__()
        self._open()
        return self

    def __exit__(self, exc, value, tb):
        result = self.tmp_file.__exit__(exc, value, tb)
        self._close()
        self.tmp_file = None
        return result

    def generate(self):
        """Perform data generation and return all data."""
        return self.write_tmp_file().read()

    def write_tmp_file(self):
        """Write generated data to temporary file and return file-like object for reading data from."""
        self._write_leading()
        self._write_variants()
        self._write_trailing()
        #: Rewind temporary file to beginning and return it.
        self.tmp_file.seek(0)
        return self.tmp_file

    def _open(self):
        """Override with action on opening the file."""

    def _close(self):
        """Override with action on closing the file."""

    def _write_leading(self):
        """Write out anything before the the per-variant data.

        Override in sub class.
        """

    def _write_variants(self):
        """Write out the actual data, override called functions rather than this one.
        """
        self._begin_write_variants()
        self._write_variants_header()
        self._write_variants_data()
        self._end_write_variants()

    def _begin_write_variants(self):
        """Fill with actions to execute before writing variants."""

    def _write_variants_header(self):
        """Fill with actions to write the variant header."""

    def _write_variants_data(self):
        """Fill with actions to write the variant data."""

    def _end_write_variants(self):
        """Fill with actions to execute after writing variants."""

    def _write_trailing(self):
        """Write out anything after the per-variant data.
Example #16
0
class LazyZipOverHTTP:
    """File-like object mapped to a ZIP file over HTTP.

    This uses HTTP range requests to lazily fetch the file's content,
    which is supposed to be fed to ZipFile.  If such requests are not
    supported by the server, raise HTTPRangeRequestUnsupported
    during initialization.
    """
    def __init__(self,
                 url: str,
                 session: PipSession,
                 chunk_size: int = CONTENT_CHUNK_SIZE) -> None:
        head = session.head(url, headers=HEADERS)
        raise_for_status(head)
        assert head.status_code == 200
        self._session, self._url, self._chunk_size = session, url, chunk_size
        self._length = int(head.headers["Content-Length"])
        self._file = NamedTemporaryFile()
        self.truncate(self._length)
        self._left: List[int] = []
        self._right: List[int] = []
        if "bytes" not in head.headers.get("Accept-Ranges", "none"):
            raise HTTPRangeRequestUnsupported("range request is not supported")
        self._check_zip()

    @property
    def mode(self) -> str:
        """Opening mode, which is always rb."""
        return "rb"

    @property
    def name(self) -> str:
        """Path to the underlying file."""
        return self._file.name

    def seekable(self) -> bool:
        """Return whether random access is supported, which is True."""
        return True

    def close(self) -> None:
        """Close the file."""
        self._file.close()

    @property
    def closed(self) -> bool:
        """Whether the file is closed."""
        return self._file.closed

    def read(self, size: int = -1) -> bytes:
        """Read up to size bytes from the object and return them.

        As a convenience, if size is unspecified or -1,
        all bytes until EOF are returned.  Fewer than
        size bytes may be returned if EOF is reached.
        """
        download_size = max(size, self._chunk_size)
        start, length = self.tell(), self._length
        stop = length if size < 0 else min(start + download_size, length)
        start = max(0, stop - download_size)
        self._download(start, stop - 1)
        return self._file.read(size)

    def readable(self) -> bool:
        """Return whether the file is readable, which is True."""
        return True

    def seek(self, offset: int, whence: int = 0) -> int:
        """Change stream position and return the new absolute position.

        Seek to offset relative position indicated by whence:
        * 0: Start of stream (the default).  pos should be >= 0;
        * 1: Current position - pos may be negative;
        * 2: End of stream - pos usually negative.
        """
        return self._file.seek(offset, whence)

    def tell(self) -> int:
        """Return the current position."""
        return self._file.tell()

    def truncate(self, size: Optional[int] = None) -> int:
        """Resize the stream to the given size in bytes.

        If size is unspecified resize to the current position.
        The current stream position isn't changed.

        Return the new file size.
        """
        return self._file.truncate(size)

    def writable(self) -> bool:
        """Return False."""
        return False

    def __enter__(self) -> "LazyZipOverHTTP":
        self._file.__enter__()
        return self

    def __exit__(self, *exc: Any) -> Optional[bool]:
        return self._file.__exit__(*exc)

    @contextmanager
    def _stay(self) -> Iterator[None]:
        """Return a context manager keeping the position.

        At the end of the block, seek back to original position.
        """
        pos = self.tell()
        try:
            yield
        finally:
            self.seek(pos)

    def _check_zip(self) -> None:
        """Check and download until the file is a valid ZIP."""
        end = self._length - 1
        for start in reversed(range(0, end, self._chunk_size)):
            self._download(start, end)
            with self._stay():
                try:
                    # For read-only ZIP files, ZipFile only needs
                    # methods read, seek, seekable and tell.
                    ZipFile(self)  # type: ignore
                except BadZipfile:
                    pass
                else:
                    break

    def _stream_response(self,
                         start: int,
                         end: int,
                         base_headers: Dict[str, str] = HEADERS) -> Response:
        """Return HTTP response to a range request from start to end."""
        headers = base_headers.copy()
        headers["Range"] = f"bytes={start}-{end}"
        # TODO: Get range requests to be correctly cached
        headers["Cache-Control"] = "no-cache"
        return self._session.get(self._url, headers=headers, stream=True)

    def _merge(self, start: int, end: int, left: int,
               right: int) -> Iterator[Tuple[int, int]]:
        """Return an iterator of intervals to be fetched.

        Args:
            start (int): Start of needed interval
            end (int): End of needed interval
            left (int): Index of first overlapping downloaded data
            right (int): Index after last overlapping downloaded data
        """
        lslice, rslice = self._left[left:right], self._right[left:right]
        i = start = min([start] + lslice[:1])
        end = max([end] + rslice[-1:])
        for j, k in zip(lslice, rslice):
            if j > i:
                yield i, j - 1
            i = k + 1
        if i <= end:
            yield i, end
        self._left[left:right], self._right[left:right] = [start], [end]

    def _download(self, start: int, end: int) -> None:
        """Download bytes from start to end inclusively."""
        with self._stay():
            left = bisect_left(self._right, start)
            right = bisect_right(self._left, end)
            for start, end in self._merge(start, end, left, right):
                response = self._stream_response(start, end)
                response.raise_for_status()
                self.seek(start)
                for chunk in response_chunks(response, self._chunk_size):
                    self._file.write(chunk)
class LazyZipOverHTTP(object):
    """File-like object mapped to a ZIP file over HTTP.

    This uses HTTP range requests to lazily fetch the file's content,
    which is supposed to be fed to ZipFile.  If such requests are not
    supported by the server, raise HTTPRangeRequestUnsupported
    during initialization.
    """

    def __init__(self, url, session, chunk_size=CONTENT_CHUNK_SIZE):
        # type: (str, PipSession, int) -> None
        head = session.head(url, headers=HEADERS)
        raise_for_status(head)
        assert head.status_code == 200
        self._session, self._url, self._chunk_size = session, url, chunk_size
        self._length = int(head.headers['Content-Length'])
        self._file = NamedTemporaryFile()
        self.truncate(self._length)
        self._left = []  # type: List[int]
        self._right = []  # type: List[int]
        if 'bytes' not in head.headers.get('Accept-Ranges', 'none'):
            raise HTTPRangeRequestUnsupported('range request is not supported')
        self._check_zip()

    @property
    def mode(self):
        # type: () -> str
        """Opening mode, which is always rb."""
        return 'rb'

    @property
    def name(self):
        # type: () -> str
        """Path to the underlying file."""
        return self._file.name

    def seekable(self):
        # type: () -> bool
        """Return whether random access is supported, which is True."""
        return True

    def close(self):
        # type: () -> None
        """Close the file."""
        self._file.close()

    @property
    def closed(self):
        # type: () -> bool
        """Whether the file is closed."""
        return self._file.closed

    def read(self, size=-1):
        # type: (int) -> bytes
        """Read up to size bytes from the object and return them.

        As a convenience, if size is unspecified or -1,
        all bytes until EOF are returned.  Fewer than
        size bytes may be returned if EOF is reached.
        """
        download_size = max(size, self._chunk_size)
        start, length = self.tell(), self._length
        stop = length if size < 0 else min(start+download_size, length)
        start = max(0, stop-download_size)
        self._download(start, stop-1)
        return self._file.read(size)

    def readable(self):
        # type: () -> bool
        """Return whether the file is readable, which is True."""
        return True

    def seek(self, offset, whence=0):
        # type: (int, int) -> int
        """Change stream position and return the new absolute position.

        Seek to offset relative position indicated by whence:
        * 0: Start of stream (the default).  pos should be >= 0;
        * 1: Current position - pos may be negative;
        * 2: End of stream - pos usually negative.
        """
        return self._file.seek(offset, whence)

    def tell(self):
        # type: () -> int
        """Return the current possition."""
        return self._file.tell()

    def truncate(self, size=None):
        # type: (Optional[int]) -> int
        """Resize the stream to the given size in bytes.

        If size is unspecified resize to the current position.
        The current stream position isn't changed.

        Return the new file size.
        """
        return self._file.truncate(size)

    def writable(self):
        # type: () -> bool
        """Return False."""
        return False

    def __enter__(self):
        # type: () -> LazyZipOverHTTP
        self._file.__enter__()
        return self

    def __exit__(self, *exc):
        # type: (*Any) -> Optional[bool]
        return self._file.__exit__(*exc)

    @contextmanager
    def _stay(self):
        # type: ()-> Iterator[None]
        """Return a context manager keeping the position.

        At the end of the block, seek back to original position.
        """
        pos = self.tell()
        try:
            yield
        finally:
            self.seek(pos)

    def _check_zip(self):
        # type: () -> None
        """Check and download until the file is a valid ZIP."""
        end = self._length - 1
        for start in reversed(range(0, end, self._chunk_size)):
            self._download(start, end)
            with self._stay():
                try:
                    # For read-only ZIP files, ZipFile only needs
                    # methods read, seek, seekable and tell.
                    ZipFile(self)  # type: ignore
                except BadZipfile:
                    pass
                else:
                    break

    def _stream_response(self, start, end, base_headers=HEADERS):
        # type: (int, int, Dict[str, str]) -> Response
        """Return HTTP response to a range request from start to end."""