Exemple #1
0
def _get_config(archive: tarfile.TarFile) -> Tuple[Dict, Dict, File]:
    """
    Extracts Docker image archive manifest and configuration.
    Returns a tuple with:
    - the deserialized manifest,
    - the deserialized configuration,
    - the configuration File object to scan.
    """
    manifest_file = archive.extractfile("manifest.json")
    if manifest_file is None:
        raise InvalidDockerArchiveException("No manifest file found.")

    manifest = json.load(manifest_file)[0]

    config_file_path = manifest.get("Config")

    config_file_info = archive.getmember(config_file_path)
    if config_file_info is None:
        raise InvalidDockerArchiveException("No config file found.")

    config_file = archive.extractfile(config_file_info)
    if config_file is None:
        raise InvalidDockerArchiveException(
            "Config file could not be extracted.")

    config_file_content = config_file.read().decode()

    return (
        manifest,
        json.loads(config_file_content),
        File(config_file_content, filename="Dockerfile or build-args"),
    )
Exemple #2
0
def read_file_from_image(img: tarfile.TarFile,
                         file_path: str,
                         autoclose=False) -> bytes:
    if autoclose:
        with closing(img.extractfile(file_path)) as fd:
            return fd.read()
    else:
        return img.extractfile(file_path).read()
Exemple #3
0
def read_file_from_image(img: tarfile.TarFile,
                         file_path: str,
                         autoclose=False) -> bytes:
    if autoclose:
        with closing(img.extractfile(file_path)) as fd:
            return fd.read()
    else:
        return img.extractfile(file_path).read()
Exemple #4
0
def store_song(connection, tar: tarfile.TarFile, song: SongInfo):
    client = boto3.client('s3')
    opts = {'ACL': 'public-read', 'Bucket': 'music.ponytone.online'}
    with connection:
        with connection.cursor() as cur:
            stuff = song._asdict()
            if song.parts:
                stuff['parts'] = psycopg2.extras.Json(song.parts)
            q = cur.execute(
                """
                INSERT INTO karaoke_song (title, artist, transcriber, genre, updated, "language", "length",
                                          preview_start, song_year, is_mlk, cover_image, parts)
                VALUES (%(title)s, %(artist)s, %(transcriber)s, %(genre)s, %(updated)s, %(language)s, %(length)s,
                        %(preview_start)s, %(song_year)s, %(is_mlk)s, %(cover)s, %(parts)s)
                RETURNING id""", stuff)
            id, = cur.fetchone()
            print(f"Inserted into DB: #{id}")

            dirname = os.path.dirname(song.notes)
            f = tar.extractfile(song.notes)
            client.put_object(Body=f,
                              Key=f"{id}/notes.txt",
                              ContentType="text/plain",
                              **opts)
            f = tar.extractfile(os.path.join(dirname, song.mp3))
            print("Uploaded MP3")
            client.put_object(Body=f,
                              Key=f"{id}/{song.mp3}",
                              ContentType="audio/mpeg",
                              **opts)
            f = tar.extractfile(os.path.join(dirname, song.cover))
            print("Uploaded cover")
            client.put_object(Body=f,
                              Key=f"{id}/{song.cover}",
                              ContentType=mimetypes.guess_type(song.cover)[0],
                              **opts)
            if song.background:
                f = tar.extractfile(os.path.join(dirname, song.background))
                client.put_object(Body=f,
                                  Key=f"{id}/{song.background}",
                                  ContentType=mimetypes.guess_type(
                                      song.background)[0],
                                  **opts)
                print("Uploaded background")
            if song.video:
                f = tar.extractfile(os.path.join(dirname, song.video))
                client.put_object(Body=f,
                                  Key=f"{id}/{song.video}",
                                  ContentType=mimetypes.guess_type(
                                      song.video)[0],
                                  **opts)
                print("Uploaded video")
    print("Committed")
def extract_file(tar: tarfile.TarFile, name: str) -> IO[bytes]:
    """
    Helper for getting a file handle to the database file in the tar archive.
    This is needed because we don't necessarily know the name of it's containing
    folder.

    :raises: TarError if the tar archive does not contain the databse file
    """
    mmdb = next(
        (m for m in tar.getmembers() if
            m.name.endswith(name)
            and m.isfile()),
        None
    )
    if mmdb is None:
        # Because we verified the checksum earlier, this should only be
        # possible if maxmind actually served us a bad file
        raise tarfile.TarError("Tar archive did not contain the database file!")

    f = tar.extractfile(mmdb)

    if f is None:
        raise tarfile.TarError("Tar archive did not contain the database file!")

    return f
Exemple #6
0
  def detect( cls, target_file, magic_type ):
    filename = os.path.basename( target_file.name )

    if not filename.endswith( '.tar.gz'):
      return None

    if not magic_type.startswith( 'gzip compressed data' ):
      return None

    ( filename, _, _ ) = filename.rsplit( '.', 2 )

    try:
      ( package, version ) = filename.rsplit( '-', 1 )  # ie: cinp-0.9.2.tar.gz
    except ValueError:
      return None

    gzfile = GzipFile( fileobj=target_file.file, mode='r' )
    tarfile = TarFile( fileobj=gzfile, mode='r' )

    try:
      info = tarfile.extractfile( '{0}/PKG-INFO'.format( filename ) )
    except KeyError:
      return None

    tarfile.close()
    gzfile.close()

    if info is None:
      return None

    return cls( filename, package, 'all', version, 'python' )
Exemple #7
0
  def detect( cls, target_file, magic_type ):
    filename = os.path.basename( target_file.name )

    if not filename.endswith( '.tar.gz'):
      return None

    if not magic_type.startswith( 'gzip compressed data' ):
      return None

    ( filename, _, _ ) = filename.rsplit( '.', 2 )

    gzfile = GzipFile( fileobj=target_file.file, mode='r' )
    tarfile = TarFile( fileobj=gzfile, mode='r' )

    try:
      manifest = json.loads( tarfile.extractfile( 'MANIFEST.json' ).read() )
    except ( KeyError, TypeError, json.JSONDecodeError ):
      return None

    tarfile.close()
    gzfile.close()

    if 'collection_info' not in manifest:
      return None

    try:
      ( namespace, name, version ) = filename.split( '-' )
    except ValueError:
      raise ValueError( 'Unrecognized Galaxy file name Format' )

    return cls( filename, '{0}-{1}'.format( namespace, name ), 'all', version, 'galaxy' )
Exemple #8
0
    def load_from_file(self, f):
        tar = TarFile(f, "r")

        # load info file
        f = tar.extractfile("info.py")
        self.agedesc, self.generation = eval(f.read(-1), {"__builtins__": None})
        f.close()

        # load agents
        for info in tar.getmembers():
            if (splitext(info.name)[1]==".agt" and info.isfile()):
                f = tar.extractfile(info)
                self.add(Agent(self.agedesc, file = f))
                f.close()

        tar.close()
Exemple #9
0
def _extract_station(stations_tar: tarfile.TarFile,
                     tar_member: tarfile.TarInfo) -> Dict[str, DataFrame]:

    if not tar_member.name.endswith(".csv"):
        return None

    # Read the records from the provided station
    data = read_file(
        stations_tar.extractfile(tar_member),
        file_type="csv",
        usecols=_COLUMN_MAPPING.keys()).rename(columns=_COLUMN_MAPPING)

    # Fix data types
    noaa_station = tar_member.name.replace(".csv", "")
    data["noaa_station"] = noaa_station
    data["rainfall"] = data["rainfall"].apply(conv_dist)
    data["snowfall"] = data["snowfall"].apply(conv_dist)
    data["dew_point"] = data["dew_point"].apply(conv_temp)
    for temp_type in ("average", "minimum", "maximum"):
        col = f"{temp_type}_temperature"
        data[col] = data[col].apply(conv_temp)

    # Compute the relative humidity from the dew point and average temperature
    data["relative_humidity"] = data.apply(
        lambda x: relative_humidity(x["average_temperature"], x["dew_point"]),
        axis=1)

    return {noaa_station: data}
def get_openface(file_):

    tar_file = TarFile(file_)
    openface_data = {}

    d = tar_file.extractfile([
        x for x in tar_file.getmembers() if x.path.endswith(".csv")
    ][0]).readlines()

    failed = set()
    reference = []
    for i, line in enumerate(d):
        split_line = line.decode("utf-8").strip().split(",")
        if i == 0:
            reference = {x: split_line.index(x) for x in split_line}
            continue

        frame = int(split_line[reference["frame"]])
        if not ONLY_ODD or frame % 2 == 1:
            confidence = float(split_line[reference["confidence"]])
            success = bool(split_line[reference["success"]])
            if not success or confidence < 0.98:
                failed.add(frame)

    return failed
Exemple #11
0
def _filter_single_tar(
    in_file: tarfile.TarFile,
    remove_entries,
):
    temp_fh = tempfile.TemporaryFile()
    temptar = tarfile.TarFile(fileobj=temp_fh, mode='w')

    for tar_info in in_file:
        if not tar_info.isfile():
            temptar.addfile(tar_info)
            continue

        if tar_info.name in remove_entries:
            logging.debug(f'purging entry: {tar_info.name}')
            continue

        # copy entry
        entry = in_file.extractfile(tar_info)
        temptar.addfile(tar_info, fileobj=entry)

    size = temp_fh.tell()
    temp_fh.flush()
    temp_fh.seek(0)

    return temp_fh, size
Exemple #12
0
def extract_docker_layer(img: tarfile.TarFile,
                         layer_id: str,
                         extract_path: str):
    with tarfile.open(fileobj=img.extractfile('%s/layer.tar' % layer_id),
                      errorlevel=0,
                      dereference=True) as layer:

        layer.extractall(path=extract_path)

        log.debug('processing whiteouts')
        for member in layer.getmembers():
            path = member.path
            if path.startswith('.wh.') or '/.wh.' in path:
                if path.startswith('.wh.'):
                    newpath = path[4:]
                else:
                    newpath = path.replace('/.wh.', '/')

                try:
                    log.debug('removing path %s', newpath)
                    os.unlink(path)
                    os.unlink(newpath)
                except OSError as err:
                    if err.errno != errno.ENOENT:
                        raise
Exemple #13
0
    def __init__(self, archive: tarfile.TarFile, name: str):
        buffer = archive.extractfile(name)
        if buffer is None:
            raise RuntimeError("No buffer for layer")

        super().__init__(buffer)

        self.name = name
        self.files = set()
        self.mask = set()

        for member in self.archive.getmembers():
            path = Path(member.name)
            if not path.name.startswith('.wh.'):
                self.files.add(member.name)
                continue

            if len(path.parent.name) > 0:
                prefix = str(path.parent) + '/'
            else:
                prefix = ''

            if path.name == '.wh..wh..opq':
                # Discard everything in the same directory
                self.mask.add(prefix)
            else:
                # Just discard one file
                self.mask.add(prefix + path.name[4:])
def _extract_tarinfo(tf: tarfile.TarFile,
                     parent_info: Dict,
                     extensions=IMG_EXTENSIONS):
    sample_count = 0
    for i, ti in enumerate(tf):
        if not ti.isfile():
            continue
        dirname, basename = os.path.split(ti.path)
        name, ext = os.path.splitext(basename)
        ext = ext.lower()
        if ext == '.tar':
            with tarfile.open(fileobj=tf.extractfile(ti), mode='r|') as ctf:
                child_info = dict(name=ti.name,
                                  path=os.path.join(parent_info['path'], name),
                                  ti=ti,
                                  children=[],
                                  samples=[])
                sample_count += _extract_tarinfo(ctf,
                                                 child_info,
                                                 extensions=extensions)
                _logger.debug(
                    f'{i}/?. Extracted child tarinfos from {ti.name}. {len(child_info["samples"])} images.'
                )
                parent_info['children'].append(child_info)
        elif ext in extensions:
            parent_info['samples'].append(ti)
            sample_count += 1
    return sample_count
Exemple #15
0
def determine_osinfo(tarfh: tarfile.TarFile) -> um.OperatingSystemId:
    '''
    tries to determine the operating system identification, roughly as specified by
        https://www.freedesktop.org/software/systemd/man/os-release.html
    and otherwise following some conventions believed to be common.

    The argument (an opened tarfile) is being read from its initial position, possibly (but
    not necessarily) to the end. The underlying stream does not need to be seekable.
    It is the caller's responsibility to close the tarfile handle after this function returns.

    The tarfile is expected to contain a directory tree from a "well-known" unix-style operating
    system distribution. In particular, the following (GNU/) Linux distributions are well-supported:
    - alpine
    - debian
    - centos

    In case nothing was recognised within the given tarfile, the returned OperatingSystemId's
    attributes will all be `None`.
    '''
    known_fnames = (
        'debian_version',
        'centos-release',
        'os-release',
    )

    os_info = {}

    for info in tarfh:
        fname = info.name.split('/')[-1]

        if not fname in known_fnames:
            continue

        if info.issym():
            # we assume fnames are the same (this assumption might not always be correct)
            continue

        if not info.isfile():
            continue

        # found an "interesting" file
        contents = tarfh.extractfile(info).read().decode('utf-8')

        if fname == 'os-release':
            for k, v in _parse_os_release(contents):
                if k in os_info:
                    if k == 'VERSION_ID' and version.is_semver_parseable(v) and \
                        not version.is_semver_parseable(os_info[k]):
                        pass
                    else:
                        continue  # os-release has lesser precedence
                os_info[k] = v
            if os_info.get('ID') == 'ubuntu' and (ver :=
                                                  os_info.get('VERSION')):
                # of _course_ ubuntu requires a special hack
                os_info['VERSION_ID'] = ver.split(' ', 1)[0]
        elif fname == 'centos-release':
            for k, v in _parse_centos_release(contents):
                os_info[k] = v
Exemple #16
0
 def parse_backup_label(self, basebackup_path):
     tar = TarFile(basebackup_path)
     content = tar.extractfile("backup_label").read()  # pylint: disable=no-member
     for line in content.split(b"\n"):
         if line.startswith(b"START WAL LOCATION"):
             start_wal_segment = line.split(b" ")[5].strip(b")").decode("utf8")
     self.log.debug("Found: %r as starting wal segment", start_wal_segment)
     return start_wal_segment
Exemple #17
0
 def from_package(cls, package: TarFile, url: str, config: ConfigFile) -> 'EnotConfig':
     f = package.extractfile('enot_config.json')
     content = f.read()
     conf = cls(json.loads(content.decode('utf-8')), url=url)
     if config is not None:
         if config.fullname:  # overwrite fullname by package's fullname (from dep.config).
             conf.fullname = config.fullname
     return conf
Exemple #18
0
    def load_from_file(self, f):
        tar = TarFile(f, "r")

        # load info file
        f = tar.extractfile("info.py")
        self.agedesc, self.generation = eval(f.read(-1),
                                             {"__builtins__": None})
        f.close()

        # load agents
        for info in tar.getmembers():
            if (splitext(info.name)[1] == ".agt" and info.isfile()):
                f = tar.extractfile(info)
                self.add(Agent(self.agedesc, file=f))
                f.close()

        tar.close()
Exemple #19
0
    def _readManifest(self):
        raw = TarFile(self.filename)

        manifest = raw.extractfile('manifest.json').read().decode()

        raw.close()

        self._manifest = json.loads(manifest)[0]
Exemple #20
0
def _extract_tar_file(tar: tarfile.TarFile, f: tarfile.TarInfo, path: Path):
    with tar.extractfile(f) as ef:
        with open(str(path), 'wb') as o:
            while True:
                r = ef.read(_TAR_BUFFER_SIZE)
                if r:
                    o.write(r)
                else:
                    break
Exemple #21
0
    def tar_contents(cls, tarfile_name, passwd=None, names=None):
        results = {}
        tf = TarFile(tarfile_name)
        if names is None:
            names = tf.getnames()

        for n in names:
            results[n] = tf.extractfile(n).read()
        return results
Exemple #22
0
def song_info(tar: tarfile.TarFile, path: str):
    content = tar.extractfile(path)
    parsed = parse(content.read())
    if parsed is None:
        return None
    artist = parsed.get('ARTIST')
    transcriber = parsed.get('CREATOR')
    genre = parsed.get('GENRE', 'Pony')
    language = parsed.get('LANGUAGE', 'English')
    title = parsed.get('TITLE')
    cover = parsed['COVER']
    song_year = int(parsed.get('YEAR', '0')) or None
    try:
        updated = dateutil.parser.parse(
            parsed['UPDATED'],
            MLKDateParserInfo()) if 'UPDATED' in parsed else None
    except ValueError:
        print(f"Couldn't parse date: {parsed['UPDATED']}")
        updated = None
    mp3_path = os.path.join(os.path.dirname(path), parsed['MP3'])
    try:
        tar.getmember(mp3_path)
    except KeyError:
        return None
    if 'END' in parsed:
        duration = int(parsed['END']) / 1000
    else:
        duration = mutagen.mp3.MP3(tar.extractfile(mp3_path)).info.length
    if 'START' in parsed:
        duration -= float(parsed['START'].replace(',', '.'))
    is_mlk = 'mylittlekaraoke' in parsed.get('COMMENT', '')
    mp3 = parsed['MP3']
    background = parsed.get('BACKGROUND')
    video = parsed.get('VIDEO')
    if 'P1' and 'P2' in parsed:
        parts = [parsed['P1'], parsed['P2']]
    else:
        parts = None

    preview_start = float(parsed['PREVIEWSTART'].replace(
        ',', '.')) if 'PREVIEWSTART' in parsed else None
    return SongInfo(title, artist, genre, song_year, duration, language,
                    transcriber, is_mlk, updated, path, mp3, background, video,
                    preview_start, parts, cover)
Exemple #23
0
class TarFileWrapper(ArchiveFileWrapper):
    def __init__(self, fh, *args, **kwargs):
        self.archive = TarFile(fileobj=fh)
        super(TarFileWrapper, self).__init__(*args, **kwargs)

    def extract_file(self, *args, **kwarg):
        return self.archive.extractfile(*args, **kwarg)

    def names(self):
        return self.archive.getnames()
Exemple #24
0
class TarFileWrapper(ArchiveFileWrapper):
    def __init__(self, fh, *args, **kwargs):
        self.archive = TarFile(fileobj=fh)
        super(TarFileWrapper, self).__init__(*args, **kwargs)

    def extract_file(self, *args, **kwarg):
        return self.archive.extractfile(*args, **kwarg)

    def names(self):
        return self.archive.getnames()
Exemple #25
0
def extractfile(tar_file: TarFile,
                name: str,
                filtr: Callable[[str], str],
                line: int = 1) -> Optional[Stream]:
    """Extract the specified file from a tarball using the specifeid filter and an optional line offset."""
    try:
        stream = tar_file.extractfile(name)
    except KeyError:
        return None
    return None if stream is None else Stream(
        (line.decode('utf-8') for line in stream.readlines()), filtr, line)
def tarfile_extract_single_file(tf: tarfile.TarFile, member: str,
                                extract_path: str):
    print("Extracting {} to {}".format(member, extract_path))

    with tf.extractfile(member) as f_obj:
        data = f_obj.read()

    os.makedirs(os.path.dirname(extract_path), mode=0o755, exist_ok=True)

    with open(extract_path, "wb") as f_obj:
        f_obj.write(data)
Exemple #27
0
def _extract_tar_file(tar: tarfile.TarFile, f: tarfile.TarInfo, path: Path):
    if not path.parent.exists():
        path.parent.mkdir(parents=True)

    with tar.extractfile(f) as ef:
        with open(str(path), 'wb') as o:
            while True:
                r = ef.read(_TAR_BUFFER_SIZE)
                if r:
                    o.write(r)
                else:
                    break
Exemple #28
0
    def load_bin(self, bin_models: tarfile.TarFile) -> None:
        """
        Loads model weights form a tar file (binary form) into our models.

        :param bin_models: Tar File containing model weights as io.BytesIO
        :return: None
        """
        city_models = []
        for model_info in bin_models:
            city_models.append(
                (bin_models.extractfile(model_info), model_info.name))
        self.agent.load_bin(city_models)
Exemple #29
0
    def layerInfo(self, layer):
        try:
            return self._layerInfo[layer]
        except KeyError:
            pass

        raw = TarFile(self.filename)
        config = raw.extractfile('{0}/json'.format(layer)).read().decode()
        self._layerInfo[layer] = json.loads(config)
        raw.close()

        return self._layerInfo[layer]
Exemple #30
0
def extracttarfile(tar: tarfile.TarFile, member: tarfile.TarInfo) -> Text:
    """Extract the given member from the tarfile. Return the path of the file
    where we output it. If this isn't a regular file or a link, tarfile module
    will return `None`. In this case return None as the output file name.
    """
    outfilename = os.path.join(conf['datadir'], member.name)
    extractfile = tar.extractfile(member)

    if not extractfile:
        return None
    with open(outfilename, 'wb') as f:
        f.write(extractfile.read())
    return outfilename
Exemple #31
0
    def config(self):
        if self._config is not None:
            return self._config

        if self._manifest is None:
            self._readManifest()

        raw = TarFile(self.filename)
        config = raw.extractfile(self._manifest['Config']).read().decode()
        self._config = json.loads(config)
        raw.close()

        return self._config
Exemple #32
0
 def test_bundler_basic(self):
     """Test the bundler to stream a tarfile."""
     with BuildSampleData() as sample_files:
         md_obj = MetaData([MetaObj(value='SomethingReal')])
         bundle_fd = NamedTemporaryFile(delete=False)
         bundle = bundler.Bundler(md_obj, sample_files)
         bundle.stream(bundle_fd)
         bundle_fd.close()
         self.assertTrue(bundle_fd)
     check_tar = TarFile(bundle_fd.name, 'r')
     md_fd = check_tar.extractfile('metadata.txt')
     self.assertTrue(md_fd)
     md_bytes = md_fd.read()
     self.assertTrue(loads(md_bytes.decode('utf8')))
    def _open_archive_file(self, archive: TarFile, name: str) -> IO[bytes]:
        while True:
            member = archive.next()
            if member is None:
                break
            if member.name == name:
                fobj = archive.extractfile(member)
                if fobj is None:
                    break
                return fobj

        # noinspection PyProtectedMember
        raise ObjectDoesNotExistError(f'File {name} is missing in archive',
                                      self._file_storage._driver, archive.name)
Exemple #34
0
    def _open_archive_file(self, archive: TarFile, name: str) -> IO[bytes]:
        while True:
            member = archive.next()
            if member is None:
                break
            if member.name == name:
                fobj = archive.extractfile(member)
                if fobj is None:
                    break
                return fobj

        raise ObjectDoesNotExistError(
            'File {} is missing in archive'.format(name),
            self._file_storage._driver, archive.name)
Exemple #35
0
    def load_section(tf: TarFile, info: TarInfo) -> Table:
        with tf.extractfile('./ReadMe') as readme:
            col_names = ['Bmag', 'Vmag', 'e_Bmag', 'e_Vmag', 'd3', 'TYC1', 'TYC2', 'TYC3',
                         'Jmag', 'e_Jmag', 'Hmag', 'e_Hmag', 'Kmag', 'e_Kmag', 'SpType']
            reader = io_ascii.get_reader(io_ascii.Cds,
                                         readme=readme,
                                         include_names=col_names)
            reader.data.table_name = 'cc*.dat'
            print('  Loading ' + os.path.basename(info.name))
            with tf.extractfile(info) as gzf, gzip.open(gzf, 'rb') as f:
                section = reader.read(f)

        section = section[section['TYC1'] != 0]
        parse_tyc_cols(section)

        convert_cols = ['Bmag', 'Vmag', 'e_Bmag', 'e_Vmag', 'Jmag', 'e_Jmag', 'Hmag', 'e_Hmag',
                        'Kmag', 'e_Kmag']
        for col in convert_cols:
            section[col] = section[col].astype(np.float64)
            section[col].convert_unit_to(u.mag)
            section[col].format = '.3f'

        return section
Exemple #36
0
def get_root_json_from_image(img: tarfile.TarFile) -> Tuple[str, dict]:
    """
    Every docker image has a root .json file with the metadata information.
    this function locate this file, load it and return the value of it and
    their name

    >>> get_docker_image_layers(img)
    ('db079554b4d2f7c65c4df3adae88cb72d051c8c3b8613eb44e86f60c945b1ca7', dict(...))
    """
    for f in img.getmembers():
        if f.name.endswith("json") and "/" not in f.name:
            c = img.extractfile(f.name).read()
            if hasattr(c, "decode"):
                c = c.decode()

            return f.name.split(".")[0], json.loads(c)

    return None, None
Exemple #37
0
    def test_can_put_extracted_file_from_tar(self):
        tempdir = self.make_tempdir()
        tarname = os.path.join(tempdir, "mytar.tar")
        filename = os.path.join(tempdir, "foo")

        # Set up a file to add the tarfile.
        with open(filename, "w") as f:
            f.write("bar")

        # Setup the tar file by adding the file to it.
        # Note there is no context handler for TarFile in python 2.6
        try:
            tar = TarFile(tarname, "w")
            tar.add(filename, "foo")
        finally:
            tar.close()

        # See if an extracted file can be uploaded to s3.
        try:
            tar = TarFile(tarname, "r")
            with closing(tar.extractfile("foo")) as f:
                self.assert_can_put_object(body=f)
        finally:
            tar.close()
Exemple #38
0
FILENAME = "projectcounts-2008.tar"
MAX_REQUESTS = 33056088 * 0.4

if __name__ == "__main__":
    tar = TarFile(FILENAME)
    inidate = datetime(year=2008, month=1, day=1)
    maxrequests = 0
    for filename in tar.getnames():
        pre, date, time = filename.split("-")
        year = int(date[0:4])
        month = int(date[4:6])
        day = int(date[6:8])
        hour = int(time[0:2])
        minute = int(time[2:4])
        second = int(time[4:6])
        date = datetime(year=year, month=month, day=day, hour=hour, minute=minute, second=second)
        td = date - inidate
        seconds = td.days * 24 * 60 * 60 + td.seconds

        f = tar.extractfile(filename)
        for line in f.readlines():
            if line.startswith("en -"):
                line = line.replace("\n", "").replace("\r", "")
                lineSplit = line.split(" ")
                requests = int(lineSplit[2])
                if requests > MAX_REQUESTS:
                    requests = MAX_REQUESTS
                print "%d %.2f" % (seconds, float(requests) / MAX_REQUESTS)
                if requests > maxrequests:
                    maxrequests = requests
Exemple #39
0
def create_new_docker_image(manifest: dict,
                            image_output_path: str,
                            img: tarfile.TarFile,
                            old_layer_digest: str,
                            new_layer_path: str,
                            new_layer_digest: str,
                            json_metadata_last_layer: dict = None,
                            json_metadata_root: dict = None):
    with tarfile.open(image_output_path, "w") as s:

        for f in img.getmembers():
            log.debug("    _> Processing file: {}".format(f.name))

            # Add new manifest
            if f.name == "manifest.json":
                # Dump Manifest to JSON
                new_manifest_json = json.dumps(manifest).encode()
                replace_or_append_file_to_layer("manifest.json",
                                                new_manifest_json,
                                                s)

            #
            # NEW LAYER INFO
            #
            elif old_layer_digest in f.name:
                # Skip for old layer.tar file
                if f.name == "{}/layer.tar".format(old_layer_digest) or \
                        "/" not in f.name:

                    log.debug(
                        "    _> Replacing layer {} by {}".format(
                            f.name,
                            new_layer_digest
                        ))

                    replace_or_append_file_to_layer("{}/layer.tar".format(
                        new_layer_digest),
                        new_layer_path,
                        s)
                else:
                    #
                    # Extra files: "json" and "VERSION"
                    #
                    c = read_file_from_image(img, f.name)

                    if "json" in f.name:
                        # Modify the JSON content to add the new
                        # hash
                        if json_metadata_last_layer:
                            c = json.dumps(json_metadata_last_layer).encode()
                        else:
                            c = c.decode().replace(old_layer_digest,
                                                   new_layer_digest).encode()

                    replace_or_append_file_to_layer("{}/{}".format(
                        new_layer_digest,
                        os.path.basename(f.name)), c, s)

            #
            # Root .json file with the global info
            #
            elif "repositories" in f.name:
                c = read_file_from_image(img, f, autoclose=False)
                j = json.loads(c.decode())

                image = list(j.keys())[0]
                tag = list(j[image].keys())[0]

                # Update the latest layer
                j[image][tag] = new_layer_digest

                new_c = json.dumps(j).encode()

                replace_or_append_file_to_layer(f.name, new_c, s)

            elif ".json" in f.name and "/" not in f.name:
                c = read_file_from_image(img, f, autoclose=False)

                # Modify the JSON content to add the new
                # hash
                if json_metadata_root:
                    j = json_metadata_root
                else:
                    j = json.loads(c.decode())

                j["rootfs"]["diff_ids"][-1] = \
                    "sha256:{}".format(new_layer_digest)

                new_c = json.dumps(j).encode()

                replace_or_append_file_to_layer(f.name, new_c, s)

            # Add the rest of files / dirs
            else:
                s.addfile(f, img.extractfile(f))