コード例 #1
0
ファイル: context.py プロジェクト: c0ns0le/zenoss-4
    def writeDataFile( self, filename, text, content_type, subdir=None ):

        """ See IExportContext.
        """
        if subdir is not None:
            filename = '/'.join( ( subdir, filename ) )

        parents = filename.split('/')[:-1]
        while parents:
            path = '/'.join(parents) + '/'
            if path not in self._archive.getnames():
                info = TarInfo(path)
                info.type = DIRTYPE
                # tarfile.filemode(0755) == '-rwxr-xr-x'
                info.mode = 0755
                info.mtime = time.time()
                self._archive.addfile(info)
            parents.pop()

        info = TarInfo(filename)
        if isinstance(text, basestring):
            stream = StringIO(text)
            info.size = len(text)
        else:
            # Assume text is a an instance of a class like
            # Products.Archetypes.WebDAVSupport.PdataStreamIterator, 
            # as in the case of ATFile
            stream = text.file
            info.size = text.size
        info.mtime = time.time()
        self._archive.addfile( info, stream )
コード例 #2
0
ファイル: context.py プロジェクト: pigaov10/plone4.3
    def writeDataFile( self, filename, text, content_type, subdir=None ):

        """ See IExportContext.
        """
        if subdir is not None:
            filename = '/'.join( ( subdir, filename ) )

        parents = filename.split('/')[:-1]
        while parents:
            path = '/'.join(parents) + '/'
            if path not in self._archive.getnames():
                info = TarInfo(path)
                info.type = DIRTYPE
                # tarfile.filemode(0755) == '-rwxr-xr-x'
                info.mode = 0755
                info.mtime = time.time()
                self._archive.addfile(info)
            parents.pop()

        info = TarInfo(filename)
        if isinstance(text, str):
            stream = StringIO(text)
            info.size = len(text)
        elif isinstance(text, unicode):
            raise ValueError("Unicode text is not supported, even if it only "
                             "contains ascii. Please encode your data. See "
                             "GS 1.7.0 changes for more")
        else:
            # Assume text is a an instance of a class like
            # Products.Archetypes.WebDAVSupport.PdataStreamIterator, 
            # as in the case of ATFile
            stream = text.file
            info.size = text.size
        info.mtime = time.time()
        self._archive.addfile( info, stream )
コード例 #3
0
ファイル: patches.py プロジェクト: kroman0/products
def writeDataFile( self, filename, text, content_type, subdir=None ):

    """ See IExportContext.
    """
    mod_time = time.time()
    if subdir is not None:
        elements = subdir.split('/')
        parents = filter(None, elements)
        while parents:
            dirname = os.path.join(*parents)
            try:
                self._archive.getmember(dirname+'/')
            except KeyError:
                info = TarInfo(dirname)
                info.size = 0
                info.mode = 509
                info.mtime = mod_time
                info.type = DIRTYPE
                self._archive.addfile(info, StringIO())
            parents = parents[:-1]

        filename = '/'.join( ( subdir, filename ) )

    stream = StringIO( text )
    info = TarInfo( filename )
    info.size = len( text )
    info.mode = 436
    info.mtime = mod_time
    self._archive.addfile( info, stream )
コード例 #4
0
ファイル: tar2xml.py プロジェクト: plison/OpenSubtitles2015
def _addToArchive(output, filename, archive):
    output.seek(0)
    xmlInfo = TarInfo(filename)
    xmlInfo.size = len(output.getbuffer())
    xmlInfo.mtime = time.time()
    archive.addfile(xmlInfo,output)    
    output.close()
コード例 #5
0
ファイル: __init__.py プロジェクト: nilp0inter/oniontree
    def put(self, content, filename="", file_hash=""):
        """ Store file information in hashed tree """ 

        if not filename and not file_hash:
            raise ValueError('Filename or FileHash is mandatory')

        if filename:
            # File accesibility
            if not os.path.exists(filename):
                raise IOError('Unaccesible file %s', filename)

            # Calc hash
            file_hash = self._get_hash(filename)
        
        if not file_hash:
            raise ValueError('Hash of file is mandatory')

        # Get file path for hash
        path, tarfile, hashed_filename = self._get_path(file_hash)

        # Create file path
        try:
            os.makedirs(path)
        except WindowsError: 
            pass
        except OSError:
            pass

        # Open tarfile
        if self.external_compressor:
            # External compressor is not suited for adding files.
            raise ValueError('You cannot use external compressor for write files')

        with TarFile.open(name=os.path.join(path, tarfile), mode='a') as tar:
            with FileLock(os.path.join(path, tarfile)) as lock:
                # Test if file already exists into tarfile
                try:
                    tar.getmember(hashed_filename)
                    raise ValueError('Member already exists')
                except KeyError:
                    pass
                except:
                    raise
            
                data = self.encoder.encode(content)
                if self.internal_compressor:
                    data = self.internal_compressor.compress(data)

                data_file = StringIO(data)

                mtime = time.time() 
                ti = TarInfo(hashed_filename)
                ti.size = data_file.len
                ti.mtime = mtime 

                tar.addfile(tarinfo=ti, fileobj=data_file)

                tar.close()

        return file_hash
コード例 #6
0
ファイル: archive.py プロジェクト: davinirjr/django-archive
    def _dump_files(self, tar):
        """
        Dump all uploaded media to the archive.
        """

        # Loop through all models and find FileFields
        for model in apps.get_models():

            # Get the name of all file fields in the model
            field_names = []
            for field in model._meta.fields:
                if isinstance(field, models.FileField):
                    field_names.append(field.name)

            # If any were found, loop through each row
            if len(field_names):
                for row in model.objects.all():
                    for field_name in field_names:
                        field = getattr(row, field_name)
                        if field:
                            field.open()
                            info = TarInfo(field.name)
                            info.size = field.size
                            tar.addfile(info, field)
                            field.close()
コード例 #7
0
    def move_certs(self, paths):
        self.log.info("Staging internal ssl certs for %s", self._log_name)
        yield self.pull_image(self.move_certs_image)
        # create the volume
        volume_name = self.format_volume_name(self.certs_volume_name, self)
        # create volume passes even if it already exists
        self.log.info("Creating ssl volume %s for %s", volume_name, self._log_name)
        yield self.docker('create_volume', volume_name)

        # create a tar archive of the internal cert files
        # docker.put_archive takes a tarfile and a running container
        # and unpacks the archive into the container
        nb_paths = {}
        tar_buf = BytesIO()
        archive = TarFile(fileobj=tar_buf, mode='w')
        for key, hub_path in paths.items():
            fname = os.path.basename(hub_path)
            nb_paths[key] = '/certs/' + fname
            with open(hub_path, 'rb') as f:
                content = f.read()
            tarinfo = TarInfo(name=fname)
            tarinfo.size = len(content)
            tarinfo.mtime = os.stat(hub_path).st_mtime
            tarinfo.mode = 0o644
            archive.addfile(tarinfo, BytesIO(content))
        archive.close()
        tar_buf.seek(0)

        # run a container to stage the certs,
        # mounting the volume at /certs/
        host_config = self.client.create_host_config(
            binds={
                volume_name: {"bind": "/certs", "mode": "rw"},
            },
        )
        container = yield self.docker('create_container',
            self.move_certs_image,
            volumes=["/certs"],
            host_config=host_config,
        )

        container_id = container['Id']
        self.log.debug(
            "Container %s is creating ssl certs for %s",
            container_id[:12], self._log_name,
        )
        # start the container
        yield self.docker('start', container_id)
        # stage the archive to the container
        try:
            yield self.docker(
                'put_archive',
                container=container_id,
                path='/certs',
                data=tar_buf,
            )
        finally:
            yield self.docker('remove_container', container_id)
        return nb_paths
コード例 #8
0
ファイル: onos_stage.py プロジェクト: K-OpenNet/ONOS-SSM
def addFile(tar, dest, file, file_size):
    if dest not in written_files:
        info = TarInfo(dest)
        info.size = file_size
        info.mtime = now
        info.mode = 0777
        tar.addfile(info, fileobj=file)
        written_files.add(dest)
コード例 #9
0
ファイル: test_revision.py プロジェクト: thepwagner/flotilla
 def generate_tar(entries):
     tar_buf = BytesIO()
     tar_file = TarFile(mode="w", fileobj=tar_buf)
     for path, contents in entries.items():
         tar_info = TarInfo(name=path)
         tar_info.size = len(contents)
         tar_file.addfile(tar_info, fileobj=BytesIO(contents))
     return BytesIO(tar_buf.getvalue())
コード例 #10
0
ファイル: archive.py プロジェクト: davinirjr/django-archive
 def _dump_meta(self, tar):
     """
     Dump metadata to the archive.
     """
     data = MixedIO()
     dump({'version': __version__}, data)
     info = TarInfo('meta.json')
     info.size = data.rewind()
     tar.addfile(info, data)
コード例 #11
0
ファイル: backup.py プロジェクト: qij3/2buntu-Django-Blog
 def handle(self, *args, **kwargs):
     """
     Process the command.
     """
     tar = TarFile.open(datetime.today().strftime("2buntu-backup-%Y-%m-%d-%H-%M-%S.tar.bz2"), "w:bz2")
     for name in self.DATABASE_MODELS:
         f = MixedIO()
         call_command("dumpdata", name, format="json", stdout=f)
         info = TarInfo("%s.json" % name.split(".")[1])
         info.size = f.rewind()
         tar.addfile(info, f)
     for model in self.IMAGE_MODELS:
         for item in model.objects.all():
             if item.image:
                 info = TarInfo(item.image.name)
                 info.size = item.image.size
                 tar.addfile(info, item.image)
     self.stdout.write("Backup completed.")
コード例 #12
0
ファイル: onos_stage.py プロジェクト: K-OpenNet/ONOS-SSM
def addString(tar, dest, string):
    if dest not in written_files:
        print dest, string
        info = TarInfo(dest)
        info.size = len(string)
        info.mtime = now
        info.mode = 0777
        file = StringIO(string)
        tar.addfile(info, fileobj=file)
        file.close()
        written_files.add(dest)
コード例 #13
0
ファイル: test_sync.py プロジェクト: OPWEN/opwen-webapp
    def given_download(self, payload: Dict[str, bytes], compression: str):
        resource_id = '{}.tar.{}'.format(uuid4(), compression)
        download_filename = join(self._content_root, resource_id)

        with self.sync._open(download_filename, 'w') as archive:
            for filename, content in payload.items():
                tarinfo = TarInfo(filename)
                tarinfo.size = len(content)
                archive.addfile(tarinfo, BytesIO(content))

        self.email_server_client_mock.download.return_value = resource_id
コード例 #14
0
ファイル: test_context.py プロジェクト: goschtl/zope
 def _addMember(path, data, modtime):
     from tarfile import DIRTYPE
     elements = path.split('/')
     parents = filter(None, [elements[x] for x in range(len(elements))])
     for parent in parents:
         info = TarInfo()
         info.name = parent
         info.size = 0
         info.mtime = mod_time
         info.type = DIRTYPE
         archive.addfile(info, StringIO())
     _addOneMember(path, data, modtime)
コード例 #15
0
    def create_file_from_string(self, filename, content):
        """Create a file with the contents passed as a string.

        :param filename: the path to put the file at inside the
            tarfile.
        :param content: the content to put in the created file.
        """
        tarinfo = TarInfo(name=filename)
        tarinfo.size = len(content)
        self._set_defaults(tarinfo)
        fileobj = StringIO(content)
        self.addfile(tarinfo, fileobj=fileobj)
コード例 #16
0
ファイル: archive.py プロジェクト: Adnn/django-archive
    def _dump_db(self, tar):
        """
        Dump the rows in each model to the archive.
        """

        # Dump the tables to a MixedIO
        data = MixedIO()
        call_command('dumpdata', all=True, format='json', indent=self.attr.get('ARCHIVE_DB_INDENT'),
                                 exclude=self.attr.get('ARCHIVE_EXCLUDE'), stdout=data)
        info = TarInfo(DB_DUMP)
        info.size = data.rewind()
        tar.addfile(info, data)
コード例 #17
0
ファイル: utils.py プロジェクト: llord1/DataScienceRecipes
def write_lines_to_tarball(tar_ball, tar_info, lines):
    """
    Writes the relevant lines to the tar ball
    """

    txt = '\n'.join(lines)
    txt = txt.encode('utf-8')
    with BytesIO(txt) as tar_file:
        info = TarInfo(name=tar_info.name)
        info.size = len(txt)
        tar_ball.addfile(info, fileobj=tar_file)
    pass
コード例 #18
0
ファイル: context.py プロジェクト: bendavis78/zope
    def writeDataFile( self, filename, text, content_type, subdir=None ):

        """ See IExportContext.
        """
        if subdir is not None:
            filename = '/'.join( ( subdir, filename ) )

        stream = StringIO( text )
        info = TarInfo( filename )
        info.size = len( text )
        info.mtime = time.time()
        self._archive.addfile( info, stream )
コード例 #19
0
    def writeDataFile( self, filename, text, content_type, subdir=None ):

        """ See IExportContext.
        """
        if subdir is not None:
            filename = '/'.join( ( subdir, filename ) )

        stream = StringIO( text )
        info = TarInfo( filename )
        info.size = len( text )
        info.mtime = time.time()
        self._archive.addfile( info, stream )
コード例 #20
0
ファイル: test_context.py プロジェクト: bendavis78/zope
 def _addMember(path, data, modtime):
     from tarfile import DIRTYPE
     elements = path.split('/')
     parents = filter(None, [elements[x] for x in range(len(elements))])
     for parent in parents:
         info = TarInfo()
         info.name = parent
         info.size = 0
         info.mtime = mod_time
         info.type = DIRTYPE
         archive.addfile(info, StringIO())
     _addOneMember(path, data, modtime)
コード例 #21
0
    def create_file_from_string(self, filename, content):
        """Create a file with the contents passed as a string.

        :param filename: the path to put the file at inside the
            tarfile.
        :param content: the content to put in the created file.
        """
        tarinfo = TarInfo(name=filename)
        tarinfo.size = len(content)
        self._set_defaults(tarinfo)
        fileobj = StringIO(content)
        self.addfile(tarinfo, fileobj=fileobj)
コード例 #22
0
def _tar_file(items):
    """Helper to create an in-memory tar file with multiple files."""
    tar_fileobj = BytesIO()
    tf = TarFile.open(mode="w|", fileobj=tar_fileobj)
    for item_name, item_bytes in items:
        ti = TarInfo(name=item_name)
        content_as_bytes = item_bytes.encode("utf-8")
        ti.size = len(content_as_bytes)
        tf.addfile(ti, BytesIO(content_as_bytes))
    tf.close()
    tar_fileobj.seek(0)
    return tar_fileobj
コード例 #23
0
ファイル: forge_client.py プロジェクト: zghzdxs/veles
 def write_package():
     tbs = ForgeClient.UPLOAD_TAR_BUFFER_SIZE
     with TarFile.open(mode="w|gz", fileobj=body, bufsize=tbs,
                       dereference=True) as tar:
         for file in files:
             self.debug("Sending %s", file)
             ti = TarInfo(file)
             fp = os.path.join(self.path, file)
             ti.size = os.path.getsize(fp)
             ti.mode = 0o666
             with open(fp, "rb") as fd:
                 tar.addfile(ti, fileobj=fd)
     body.close()
コード例 #24
0
ファイル: csvw.py プロジェクト: GSS-Cogs/gss-utils
def run_ics(group: str,
            turtle: bytes,
            extra_files: List[str] = (),
            extra_data: List[bytes] = ()):
    client = docker.from_env()
    files = ['data.ttl']
    if len(extra_files) > 0:
        files.extend(extra_files)
    if len(extra_data) > 0:
        files.extend(f"extra_{i}.ttl" for i in range(0, len(extra_data)))
    tests = client.containers.create(
        'gsscogs/gdp-sparql-tests',
        command=f'''sparql-test-runner -t /usr/local/tests/{group} -m 10 '''
        f'''{" ".join('/tmp/' + f for f in files)}''')
    archive = BytesIO()
    with TarFile(fileobj=archive, mode='w') as t:
        ttl = TarInfo('data.ttl')
        ttl.size = len(turtle)
        ttl.mtime = time.time()
        t.addfile(ttl, BytesIO(turtle))
        for filename in extra_files:
            actual_path = Path('features') / 'fixtures' / 'extra' / filename
            with actual_path.open('rb') as actual_file:
                extra_file = t.gettarinfo(arcname=filename,
                                          fileobj=actual_file)
                t.addfile(extra_file, actual_file)
        for i, add_turtle in enumerate(extra_data):
            filename = f'extra_{i}.ttl'
            add_ttl = TarInfo(filename)
            add_ttl.size = len(add_turtle)
            add_ttl.mtime = time.time()
            t.addfile(add_ttl, BytesIO(add_turtle))
    archive.seek(0)
    tests.put_archive('/tmp/', archive)
    tests.start()
    response = tests.wait()
    sys.stdout.write(tests.logs().decode('utf-8'))
    return response['StatusCode']
コード例 #25
0
def uploadDF(dataflowName):
    dataflowStr = None
    udfs = {}
    dataflowPath = os.path.join(path, "dataflows", dataflowName)
    with open(os.path.join(dataflowPath, "dataflowInfo.json"), 'r') as df:
        dataflowStr = df.read()

    if os.path.exists(dataflowPath + "/udfs/"):
        for udf in os.listdir(os.path.join(dataflowPath, "udfs")):
            with open(os.path.join(dataflowPath, "udfs", udf), 'r') as udfFile:
                udfs[udf] = udfFile.read()

    retinaBuf = io.BytesIO()
    with tarfile.open(fileobj=retinaBuf, mode="w:gz") as tar:
        info = TarInfo("dataflowInfo.json")
        info.size = len(dataflowStr)
        tar.addfile(info, io.BytesIO(bytearray(dataflowStr, "utf-8")))

        # # ##udfs directory
        if udfs:
            info = TarInfo("udfs")
            info.type = tarfile.DIRTYPE
            info.mode = 0o755
            tar.addfile(info)

            # ##Add udf to the above dir
            for udfName, udfCode in udfs.items():
                info = TarInfo(name="udfs/" + udfName)
                info.size = len(udfCode)
                info.mode = 0o755
                tar.addfile(info, io.BytesIO(bytearray(udfCode, "utf-8")))

    try:
        retina.delete(dataflowName)
    except:
        print("Dataflow deletion failed!", dataflowName, availableRetinas)

    retina.add(dataflowName, retinaBuf.getvalue())
コード例 #26
0
ファイル: utils.py プロジェクト: vladyan18/madt
    def from_str(cls, filename, string):
        self = cls()

        b_str = string.encode('utf-8')

        info = TarInfo(filename)

        fileobj = io.BytesIO()
        info.size = fileobj.write(b_str)
        fileobj.seek(0)

        self.addfile(tarinfo=info, fileobj=fileobj)

        return self
コード例 #27
0
def tarball_images(
    images: List[Image.Image],
    *,
    name: str = None,
    animated: bool = False,
    format: str = "png",
    extras: List[Tuple[str, BytesIO]],
) -> BytesIO:
    fp = BytesIO()
    tar = TarFile(mode="w", fileobj=fp)

    for idx, image in enumerate(images):
        f = BytesIO()
        if animated:
            image[0].save(f,
                          format,
                          append_images=image[1:],
                          save_all=True,
                          loop=0)
        else:
            image.save(f, format)

        f.seek(0)
        if name:
            info = TarInfo(f"{name}_{idx}.{format}")
        else:
            info = TarInfo(f"{idx}.{format}")
        info.size = len(f.getbuffer())
        tar.addfile(info, fileobj=f)

    for extra in extras:
        info = TarInfo(extra[0] or "_.txt")
        info.size = len(extra[1].getbuffer())
        tar.addfile(info, fileobj=extra[1])

    fp.seek(0)
    return fp
コード例 #28
0
ファイル: archive.py プロジェクト: Adnn/django-archive
 def _dump_meta(self, tar):
     """
     Dump metadata to the archive.
     """
     data = MixedIO()
     meta_dict = OrderedDict((
         ('version', __version__),
         ('db_file', DB_DUMP),
         ('media_folder', MEDIA_DIR),
         ('settings', self.attr.settings_dict()),
     ))
     dump(meta_dict, data, indent=2)
     info = TarInfo(META_DUMP)
     info.size = data.rewind()
     tar.addfile(info, data)
コード例 #29
0
def generate_dataset(config):
    destination = config.absolute(File.COMPRESSED_DATASET)
    csv_lines = [REPO_LIST_HEADERS]
    for lang, ext in config.extensions.items():
        for pos in range(REPO_PER_LANG):
            full_name = f'lang_{ext}/repo_{pos:02}'
            csv_lines.append(REPO_LINE.format(full_name=full_name, lang=lang))

    csv_bytes = '\n'.join(csv_lines).encode()
    with TarFile.open(destination, 'w:gz') as tar_file:
        tar_info = TarInfo(DATASET_FILENAME)
        tar_info.size = len(csv_bytes)
        tar_file.addfile(tar_info, BytesIO(csv_bytes))

    return True, 200
コード例 #30
0
ファイル: archive.py プロジェクト: haruanm/django-archive
 def _dump_meta(self, tar):
     """
     Dump metadata to the archive.
     """
     data = MixedIO()
     meta_dict = OrderedDict((
         ('version', __version__),
         ('db_file', DB_DUMP),
         ('media_folder', MEDIA_DIR),
         ('settings', self.attr.settings_dict()),
     ))
     dump(meta_dict, data, indent=2)
     info = TarInfo(META_DUMP)
     info.size = data.rewind()
     tar.addfile(info, data)
コード例 #31
0
def run_csv2rdf(csv_filename: str, metadata_filename: str, csv_io: TextIO, metadata_io: TextIO):
    client = docker.from_env()
    csv2rdf = client.containers.create(
        'gsscogs/csv2rdf',
        command=f'csv2rdf -m annotated -o /tmp/output.ttl -t /tmp/{csv_filename} -u /tmp/{metadata_filename}'
    )
    archive = BytesIO()
    metadata_io.seek(0, SEEK_END)
    metadata_size = metadata_io.tell()
    metadata_io.seek(0)
    csv_io.seek(0, SEEK_END)
    csv_size = csv_io.tell()
    csv_io.seek(0)
    with TarFile(fileobj=archive, mode='w') as t:
        tis = TarInfo(str(metadata_filename))
        tis.size = metadata_size
        tis.mtime = time.time()
        t.addfile(tis, BytesIO(metadata_io.read().encode('utf-8')))
        tic = TarInfo(str(csv_filename))
        tic.size = csv_size
        tic.mtime = time.time()
        t.addfile(tic, BytesIO(csv_io.read().encode('utf-8')))
    archive.seek(0)
    csv2rdf.put_archive('/tmp/', archive)
    csv2rdf.start()
    response = csv2rdf.wait()
    sys.stdout.write(csv2rdf.logs().decode('utf-8'))
    assert_equal(response['StatusCode'], 0)
    output_stream, output_stat = csv2rdf.get_archive('/tmp/output.ttl')
    output_archive = BytesIO()
    for line in output_stream:
        output_archive.write(line)
    output_archive.seek(0)
    with TarFile(fileobj=output_archive, mode='r') as t:
        output_ttl = t.extractfile('output.ttl')
        return output_ttl.read()
コード例 #32
0
ファイル: archive.py プロジェクト: haruanm/django-archive
    def _dump_db(self, tar):
        """
        Dump the rows in each model to the archive.
        """

        # Dump the tables to a MixedIO
        data = MixedIO()
        call_command('dumpdata',
                     all=True,
                     format='json',
                     indent=self.attr.get('ARCHIVE_DB_INDENT'),
                     exclude=self.attr.get('ARCHIVE_EXCLUDE'),
                     stdout=data)
        info = TarInfo(DB_DUMP)
        info.size = data.rewind()
        tar.addfile(info, data)
コード例 #33
0
def create_archive(filepaths):
    tarstream = BytesIO()
    tarfile = TarFile(fileobj=tarstream, mode='w')
    for filepath in filepaths:
        file = open(filepath, 'r')
        file_data = file.read()

        tarinfo = TarInfo(name=basename(file.name))
        tarinfo.size = len(file_data)
        tarinfo.mtime = time()

        tarfile.addfile(tarinfo, BytesIO(file_data))

    tarfile.close()
    tarstream.seek(0)
    return tarstream
コード例 #34
0
    def run(self, args, argv):
        # Create a temporary tarball with our whole build context and
        # dockerfile for the update
        tmp = tempfile.NamedTemporaryFile(suffix="dckr.tar.gz")
        tmp_tar = TarFile(fileobj=tmp, mode='w')

        # Add the executable to the tarball, using the current
        # configured binfmt_misc path. If we don't get a path then we
        # only need the support libraries copied
        ff, enabled = _check_binfmt_misc(args.executable)

        if not enabled:
            print("binfmt_misc not enabled, update disabled")
            return 1

        if ff:
            tmp_tar.add(args.executable, arcname=ff)

        # Add any associated libraries
        libs = _get_so_libs(args.executable)
        if libs:
            for l in libs:
                tmp_tar.add(os.path.realpath(l), arcname=l)

        # Create a Docker buildfile
        df = StringIO()
        df.write(u"FROM %s\n" % args.tag)
        df.write(u"ADD . /\n")

        df_bytes = BytesIO(bytes(df.getvalue(), "UTF-8"))

        df_tar = TarInfo(name="Dockerfile")
        df_tar.size = df_bytes.getbuffer().nbytes
        tmp_tar.addfile(df_tar, fileobj=df_bytes)

        tmp_tar.close()

        # reset the file pointers
        tmp.flush()
        tmp.seek(0)

        # Run the build with our tarball context
        dkr = Docker()
        dkr.update_image(args.tag, tmp, quiet=args.quiet)

        return 0
コード例 #35
0
 def _add_entry(
     self,
     name: str,
     type: bytes,
     mode: int,
     mtime: int,
     size: int,
     data: Optional[IO[bytes]],
     linkname: str = "",
 ) -> None:
     info = TarInfo(name)
     info.type = type
     info.mode = mode
     info.size = size
     info.mtime = mtime
     info.linkname = linkname
     return self._inner.addfile(info, data)
コード例 #36
0
ファイル: docker.py プロジェクト: MaddTheSane/qemu
    def run(self, args, argv):
        # Create a temporary tarball with our whole build context and
        # dockerfile for the update
        tmp = tempfile.NamedTemporaryFile(suffix="dckr.tar.gz")
        tmp_tar = TarFile(fileobj=tmp, mode='w')

        # Add the executable to the tarball, using the current
        # configured binfmt_misc path. If we don't get a path then we
        # only need the support libraries copied
        ff, enabled = _check_binfmt_misc(args.executable)

        if not enabled:
            print("binfmt_misc not enabled, update disabled")
            return 1

        if ff:
            tmp_tar.add(args.executable, arcname=ff)

        # Add any associated libraries
        libs = _get_so_libs(args.executable)
        if libs:
            for l in libs:
                tmp_tar.add(os.path.realpath(l), arcname=l)

        # Create a Docker buildfile
        df = StringIO()
        df.write("FROM %s\n" % args.tag)
        df.write("ADD . /\n")
        df.seek(0)

        df_tar = TarInfo(name="Dockerfile")
        df_tar.size = len(df.buf)
        tmp_tar.addfile(df_tar, fileobj=df)

        tmp_tar.close()

        # reset the file pointers
        tmp.flush()
        tmp.seek(0)

        # Run the build with our tarball context
        dkr = Docker()
        dkr.update_image(args.tag, tmp, quiet=args.quiet)

        return 0
コード例 #37
0
    def file_write(self,
                   path,
                   content,
                   mode=None,
                   owner=None,
                   group=None,
                   append=False,
                   hide=False,
                   sudo=False):
        """
        Writes a file to the container

        @param path: path of the file
        @param content: content to be put in the file
        @param mode: file mode
        @param owner: owner of the file
        @param group: group of the file
        @param append: append content to the file
        @param hide: hide (debug) logs
        @raise runtimeError: path for file couldn't be created
        """
        if append and self.exists(path):
            content = self.file_read(path) + content
        file_name = os.path.basename(path)
        dir_name = os.path.dirname(path)
        buf = BytesIO()
        with TarFile("write_file", mode='w', fileobj=buf) as tarf:
            f = BytesIO()
            length = f.write(content.encode('utf8'))
            f.seek(0)
            tari = TarInfo(name=file_name)
            tari.size = length
            if not mode is None:
                tari.mode = mode
            if not owner is None:
                tari.uname = owner
            if not group is None:
                tari.gname = group
            tarf.addfile(tari, f)
        if not self.exists(dir_name):
            result = self.container.exec_run("mkdir -p %s" % dir_name)
            if result.exit_code != 0:
                raise RuntimeError("Could not create path %s!\n%s" %
                                   (dir_name, result.output))
        self.container.put_archive(dir_name, buf.getvalue())
コード例 #38
0
    def _dump_db(self, tar):
        """
        Dump the rows in each model to the archive.
        """

        # Determine the list of models to exclude
        exclude = getattr(settings, 'ARCHIVE_EXCLUDE', (
            'auth.Permission',
            'contenttypes.ContentType',
            'sessions.Session',
        ))

        # Dump the tables to a MixedIO
        data = MixedIO()
        call_command('dumpdata', all=True, format='json', exclude=exclude, stdout=data)
        info = TarInfo('data.json')
        info.size = data.rewind()
        tar.addfile(info, data)
コード例 #39
0
    def _unpack_data(self, tar: TarFile, data_archive: TarFile):
        with io.BytesIO(
                str.encode("\n".join([
                    member.name.lstrip(".")
                    for member in data_archive if member.name.lstrip(".")
                ]) + "\n")) as fileobj:
            info = TarInfo("list")
            info.size = fileobj.getbuffer().nbytes
            self._unpack_info_file(tar, info, fileobj)

        names = tar.getnames()

        for member in (member for member in data_archive
                       if member.name not in names):
            if member.islnk() or member.issym() or member.isdir():
                tar.addfile(member)
            else:
                with data_archive.extractfile(member) as fileobj:
                    tar.addfile(member, fileobj)
コード例 #40
0
    def stream_context(self):
        """Start streaming the tar context for Docker."""
        with TarFile.open(
            mode='w|',
            fileobj=getattr(sys.stdout, 'buffer', sys.stdout)
        ) as tarfile:

            tarfile.add(
                self.context,
                arcname='.',
                exclude=self.exclude
            )

            tarinfo = TarInfo('./Dockerfile')
            tarinfo.size = len(self.dockerfile)
            tarfile.addfile(tarinfo, BytesIO(self.dockerfile.encode('UTF-8')))

        tarfile.close()
        sys.stdout.flush()
コード例 #41
0
ファイル: tarball.py プロジェクト: seblu/installsystems
 def add_str(self, name, content, ftype, mode, mtime=None,
             uid=None, gid=None, uname=None, gname=None):
     '''
     Add a string in memory as a file in tarball
     '''
     if isinstance(name, unicode):
         name = name.encode("UTF-8")
     ti = TarInfo(name)
     # set tarinfo attribute
     for v in ("name", "ftype", "mode", "mtime", "uid", "gid", "uname", "gname"):
         if vars()[v] is not None:
             vars(ti)[v] = vars()[v]
     # set mtime to current if not specified
     if mtime is None:
         ti.mtime = int(time())
     # unicode char is encoded in UTF-8, has changelog must be in UTF-8
     if isinstance(content, unicode):
         content = content.encode("UTF-8")
     ti.size = len(content) if content is not None else 0
     self.addfile(ti, StringIO(content))
コード例 #42
0
ファイル: multitraverser.py プロジェクト: ableeb/WebOOT
    def tarfile(self, format, filename, content_type):
        from .root.histogram import Histogram
        from .combination import Combination
        imgformat = "eps"

        tarred_contents = StringIO()
        with closing(open_tar(mode="w" + format, fileobj=tarred_contents)) as tar:
            for key, context in self.resource_to_render.indexed_contexts:
                if not context_renderable_as(context, imgformat):
                    continue
                name = "/".join(map(str, key))
                content = context.rendered(imgformat).content.body

                info = TarInfo(name=name + "." + imgformat)
                info.size = len(content)
                tar.addfile(tarinfo=info, fileobj=StringIO(content))

        return Response(tarred_contents.getvalue(), content_type=content_type,
                        content_disposition=("Content-Disposition: attachment; filename={0};"
                                             .format(filename)))
コード例 #43
0
    def writeDataFile(self, filename, text, content_type, subdir=None):
        """ See IExportContext.
        """
        if subdir is not None:
            filename = '/'.join((subdir, filename))

        parents = filename.split('/')[:-1]
        while parents:
            path = '/'.join(parents) + '/'
            if path not in self._archive.getnames():
                info = TarInfo(path)
                info.type = DIRTYPE
                info.mtime = time.time()
                self._archive.addfile(info)
            parents.pop()

        stream = StringIO(text)
        info = TarInfo(filename)
        info.size = len(text)
        info.mtime = time.time()
        self._archive.addfile(info, stream)
コード例 #44
0
ファイル: docker.py プロジェクト: terrytzq/xen-blktap3
    def run(self, args, argv):
        # Create a temporary tarball with our whole build context and
        # dockerfile for the update
        tmp = tempfile.NamedTemporaryFile(suffix="dckr.tar.gz")
        tmp_tar = TarFile(fileobj=tmp, mode='w')

        # Add the executable to the tarball
        bn = os.path.basename(args.executable)
        ff = "/usr/bin/%s" % bn
        tmp_tar.add(args.executable, arcname=ff)

        # Add any associated libraries
        libs = _get_so_libs(args.executable)
        if libs:
            for l in libs:
                tmp_tar.add(os.path.realpath(l), arcname=l)

        # Create a Docker buildfile
        df = StringIO()
        df.write("FROM %s\n" % args.tag)
        df.write("ADD . /\n")
        df.seek(0)

        df_tar = TarInfo(name="Dockerfile")
        df_tar.size = len(df.buf)
        tmp_tar.addfile(df_tar, fileobj=df)

        tmp_tar.close()

        # reset the file pointers
        tmp.flush()
        tmp.seek(0)

        # Run the build with our tarball context
        dkr = Docker()
        dkr.update_image(args.tag, tmp, quiet=args.quiet)

        return 0
コード例 #45
0
    def archivestream(self, ticket):
        stream = cStringIO.StringIO()

        with tarfile.open(mode='w|', fileobj=stream):
            for filepath, arcpath, cont_name, cont_id, f_size, f_modified in ticket[
                    'target']:
                tarinfo = TarInfo()
                tarinfo.name = arcpath.lstrip('/')
                tarinfo.size = f_size
                tarinfo.mtime = datetime_to_epoch(f_modified)
                tarinfo_buf = tarinfo.tobuf()
                signed_url = None
                try:
                    signed_url = files.get_signed_url(filepath, config.fs)
                except fs.errors.ResourceNotFound:
                    pass

                if signed_url:
                    content_generator = self.stream_file_signed_url(
                        signed_url, tarinfo_buf,
                        (filepath, cont_name, cont_id, arcpath))
                else:
                    content_generator = self.stream_regular_file(
                        filepath, tarinfo_buf,
                        (filepath, cont_name, cont_id, arcpath))

                for chunk in content_generator:
                    yield chunk

                self.log_user_access(
                    AccessType.download_file,
                    cont_name=cont_name,
                    cont_id=cont_id,
                    filename=os.path.basename(arcpath),
                    origin_override=ticket['origin'],
                    download_ticket=ticket['_id'])  # log download
        yield stream.getvalue()  # get tar stream trailer
        stream.close()
コード例 #46
0
ファイル: multitraverser.py プロジェクト: ableeb/WebOOT
    def tarfile(self, format, filename, content_type):
        from .root.histogram import Histogram
        from .combination import Combination
        imgformat = "eps"

        tarred_contents = StringIO()
        with closing(open_tar(mode="w" + format,
                              fileobj=tarred_contents)) as tar:
            for key, context in self.resource_to_render.indexed_contexts:
                if not context_renderable_as(context, imgformat):
                    continue
                name = "/".join(map(str, key))
                content = context.rendered(imgformat).content.body

                info = TarInfo(name=name + "." + imgformat)
                info.size = len(content)
                tar.addfile(tarinfo=info, fileobj=StringIO(content))

        return Response(
            tarred_contents.getvalue(),
            content_type=content_type,
            content_disposition=(
                "Content-Disposition: attachment; filename={0};".format(
                    filename)))
コード例 #47
0
ファイル: fruitbak.py プロジェクト: wsldankers/fruitbak
def tar(host, backup, share, path):
	binary_stdout = stdout.buffer

	fbak = Fruitbak(confdir = Path('/dev/shm/conf'))
	backup = fbak[host][backup]
	if path is None:
		share, path = backup.locate_path(share)
	else:
		share = backup[share]

	def iterator():
		for dentry in share.find(path):
			if dentry.is_file and not dentry.is_hardlink:
				yield from dentry.hashes

	with fbak.pool.agent().readahead(iterator()) as reader:
		for dentry in share.find(path):
			name = dentry.name or b'.'
			i = TarInfo(fsdecode(bytes(name)))
			i.mode = dentry.mode & 0o7777
			i.uid = dentry.uid
			i.gid = dentry.gid
			i.mtime = dentry.mtime // 1000000000
			if dentry.is_hardlink:
				i.type = LNKTYPE
				hardlink = dentry.hardlink or b'.'
				i.linkname = fsdecode(bytes(hardlink))
			elif dentry.is_file:
				i.type = REGTYPE
				i.size = dentry.size
			elif dentry.is_symlink:
				i.type = SYMTYPE
				i.linkname = fsdecode(bytes(dentry.symlink))
			elif dentry.is_chardev:
				i.type = CHRTYPE
				i.devmajor = dentry.major
				i.devminor = dentry.minor
			elif dentry.is_blockdev:
				i.type = BLKTYPE
				i.devmajor = dentry.major
				i.devminor = dentry.minor
			elif dentry.is_directory:
				i.type = DIRTYPE
			elif dentry.is_fifo:
				i.type = FIFOTYPE
			else:
				continue

			binary_stdout.write(i.tobuf(GNU_FORMAT))

			if dentry.is_file and not dentry.is_hardlink:
				for hash in dentry.hashes:
					action = next(reader)
					if action.exception:
						raise action.exception[1]
					binary_stdout.write(action.value)
				padding = -i.size % BLOCKSIZE
				if padding:
					binary_stdout.write(bytes(padding))

	binary_stdout.write(b'\0' * (BLOCKSIZE*2))
コード例 #48
0
def update():
    logging.info('Backup update started.')
    global args
    # Compare archive contents
    # dir_path, basename = path.split(abspath)
    # ext = ' '
    # while len(ext) > 0:
    #     basename, ext = path.splitext(basename)
    lst_name = path.join(dir_path, basename + '.lst.gz')
    try:
        logging.info("Collect backed up files info")
        backed = {}
        if not path.exists(lst_name):
            with TarFile.open(args.dst,
                              'r',
                              ignore_zeros=True,
                              errorlevel=0,
                              encoding='mbcs',
                              errors='utf-8') as arc:
                try:
                    member = arc.next()
                    while member is not None:
                        try:
                            # if sys.version_info[0] > 2:
                            #     fn = member.name + u''
                            # else:
                            fn = member.name.decode('cp1251', errors='replace')
                            backed[fn] = member
                            with gzip.open(lst_name, 'a') as f:
                                f.write(fn.encode('utf8', errors='replace'))
                                # if sys.version_info[0] > 2:
                                #     f.write(bytes('\t' + str(member.mtime) + '\t' + str(member.size) + '\n', 'utf8'))
                                # else:
                                f.write('\t' + str(member.mtime) + '\t' +
                                        str(member.size) + '\n')
                        except UnicodeEncodeError as e:
                            logging.warning('UnicodeEncodeError: ' + str(e),
                                            exc_info=True)
                        member = arc.next()
                except IOError as e:
                    logging.warning('IOError: ' + str(e), exc_info=True)

            n = 1
            incpath = abspath.replace(basename, basename + '_inc%s' % n)
            while path.exists(incpath):
                with TarFile.open(incpath,
                                  'r',
                                  ignore_zeros=True,
                                  errorlevel=0,
                                  encoding='mbcs',
                                  errors='utf-8') as arc:
                    try:
                        member = arc.next()
                        while member is not None:
                            try:
                                # if sys.version_info[0] > 2:
                                #     fn = member.name
                                # else:
                                fn = member.name.decode('utf8',
                                                        errors='replace')
                                if fn not in backed:
                                    backed[fn] = member
                                    with gzip.open(lst_name, 'a') as f:
                                        f.write(
                                            fn.encode('utf8',
                                                      errors='replace'))
                                        # if sys.version_info[0] > 2:
                                        #     f.write(bytes('\t' + str(member.mtime) + '\t' + str(member.size) + '\n',
                                        #                   'utf8'))
                                        # else:
                                        f.write('\t' + str(member.mtime) +
                                                '\t' + str(member.size) + '\n')
                            except UnicodeEncodeError as e:
                                logging.warning('UnicodeEncodeError: ' +
                                                str(e),
                                                exc_info=True)
                            member = arc.next()
                    except IOError as e:
                        logging.warning('IOError: ' + str(e), exc_info=True)

                n += 1
                incpath = abspath.replace(basename, basename + '_inc%s' % n)
        else:
            with gzip.open(lst_name, 'r') as f:
                for line in f:
                    fn = b''
                    mtime = fsize = 0
                    v = line.split(b'\t')
                    if len(v) > 2:
                        fn, mtime, fsize = v
                        fsize = fsize.replace(b'\r', b'')
                        fsize = fsize.replace(b'\n', b'')
                    else:
                        fn, mtime = v
                    mtime = mtime.replace(b'\r', b'')
                    mtime = mtime.replace(b'\n', b'')
                    fn = unicode(fn, 'utf8')
                    info = TarInfo(fn)
                    info.mtime = int(mtime)
                    info.size = int(fsize)
                    backed[fn] = info

        n = 1
        incpath = abspath.replace(basename, basename + '_inc%s' % n)
        while path.exists(incpath):
            if path.getsize(incpath) < 2048:
                try:
                    with TarFile.open(incpath,
                                      'r',
                                      ignore_zeros=True,
                                      errorlevel=0,
                                      encoding='mbcs',
                                      errors='utf-8') as arc:
                        try:
                            member = arc.next()
                            if member is None:
                                arc.close()
                                logging.warning(incpath +
                                                ' is empty. Removing.')
                                remove(incpath)
                                break
                        except IOError as e:
                            logging.warning('IOError: ' + str(e),
                                            exc_info=True)
                except TarError as e:
                    logging.warning('TarError: ' + str(e) + '. Removing ' +
                                    incpath + '.')
                    remove(incpath)
                    break
            n += 1
            incpath = abspath.replace(basename, basename + '_inc%s' % n)
        updatedlst = path.join(dir_path, basename + updated_postfix)
        if path.exists(updatedlst):
            remove(updatedlst)

        exception_thrown = False
        added_count = 0
        with TarFile.open(incpath, 'w:gz', ignore_zeros=True, encoding='mbcs', errors='utf-8') as arc,\
            gzip.open(lst_name, 'a') as lst:
            for dirpath, dirnames, filenames in walk(args.src):
                if exception_thrown:
                    break
                for filename in filenames:
                    fn = path.join(dirpath, filename)
                    op = ' added'
                    key = fn[3:].replace('\\', '/')
                    if key in backed:
                        if backed[key].mtime < int(path.getmtime(fn)):
                            op = ' updated'
                            backed.pop(key)
                        else:
                            logging.debug(fn[3:] + ' up to date')
                            backed.pop(key)
                            continue
                    try:
                        arc.add(fn)
                        added_count += 1
                        lst.write(key.encode('utf8', errors='replace'))
                        # if sys.version_info[0] > 2:
                        #     lst.write(bytes('\t' + str(int(path.getmtime(fn))) + '\t' + str(path.getsize(fn)) + '\n',
                        #                     'utf8'))
                        # else:
                        lst.write(
                            '\t' + str(int(path.getmtime(fn))) + '\t' +
                            str(path.getsize(fn)) + '\n', )
                        if op == ' updated':
                            with gzip.open(updatedlst, 'a') as u:
                                u.write(
                                    key.encode('utf8', errors='replace') +
                                    b'\n')
                        logging.info(fn[3:] + op)
                    except CompressionError as e:
                        logging.warning(fn + ': CompressionError: ' + str(e),
                                        exc_info=True)
                    except StreamError as e:
                        logging.warning(fn + ': StreamError: ' + str(e),
                                        exc_info=True)
                    except UnicodeEncodeError as e:
                        logging.warning(fn + ': UnicodeEncodeError: ' + str(e),
                                        exc_info=True)
                    except IOError as e:
                        logging.warning(fn + ': IOError: ' + str(e),
                                        exc_info=True)
                        exception_thrown = True
                        break
                    except Exception as e:
                        logging.warning(fn + ': Exception: ' + str(e),
                                        exc_info=True)

        # Remove backup file if no files added
        if not added_count:
            remove(incpath)

        if len(backed) > 0:
            with gzip.open(path.join(dir_path, basename + delete_postfix),
                           'w') as dl:
                for k in backed.keys():
                    try:
                        dl.write(k.encode('utf8', errors='replace'))
                        dl.write(b'\n')
                    except Exception as e:
                        logging.warning(str(e), exc_info=True)
        if exception_thrown:
            return exception_thrown
        else:
            logging.info("Done.")
            return exception_thrown
    except ReadError:
        create()
コード例 #49
0
def get_artifact_file(artifact_file_name: str):
    artifact_file_path = get_artifact_file_path(artifact_file_name)
    artifact_file = open(artifact_file_path, mode='r+b')
    artifact_file_info = TarInfo(artifact_file_name)
    artifact_file_info.size = get_file_size(artifact_file)
    return artifact_file_info, artifact_file
コード例 #50
0
def generate_biom_and_metadata_release(study_status='public'):
    """Generate a list of biom/meatadata filepaths and a tgz of those files

    Parameters
    ----------
    study_status : str, optional
        The study status to search for. Note that this should always be set
        to 'public' but having this exposed helps with testing. The other
        options are 'private' and 'sandbox'
    """
    studies = qdb.study.Study.get_by_status(study_status)
    qiita_config = ConfigurationManager()
    working_dir = qiita_config.working_dir
    portal = qiita_config.portal
    bdir = qdb.util.get_db_files_base_dir()
    time = datetime.now().strftime('%m-%d-%y %H:%M:%S')

    data = []
    for s in studies:
        # [0] latest is first, [1] only getting the filepath
        sample_fp = relpath(s.sample_template.get_filepaths()[0][1], bdir)

        for a in s.artifacts(artifact_type='BIOM'):
            if a.processing_parameters is None or a.visibility != study_status:
                continue

            merging_schemes, parent_softwares = a.merging_scheme
            software = a.processing_parameters.command.software
            software = '%s v%s' % (software.name, software.version)

            for x in a.filepaths:
                if x['fp_type'] != 'biom' or 'only-16s' in x['fp']:
                    continue
                fp = relpath(x['fp'], bdir)
                for pt in a.prep_templates:
                    categories = pt.categories()
                    platform = ''
                    target_gene = ''
                    if 'platform' in categories:
                        platform = ', '.join(
                            set(pt.get_category('platform').values()))
                    if 'target_gene' in categories:
                        target_gene = ', '.join(
                            set(pt.get_category('target_gene').values()))
                    for _, prep_fp in pt.get_filepaths():
                        if 'qiime' not in prep_fp:
                            break
                    prep_fp = relpath(prep_fp, bdir)
                    # format: (biom_fp, sample_fp, prep_fp, qiita_artifact_id,
                    #          platform, target gene, merging schemes,
                    #          artifact software/version,
                    #          parent sofware/version)
                    data.append(
                        (fp, sample_fp, prep_fp, a.id, platform, target_gene,
                         merging_schemes, software, parent_softwares))

    # writing text and tgz file
    ts = datetime.now().strftime('%m%d%y-%H%M%S')
    tgz_dir = join(working_dir, 'releases')
    create_nested_path(tgz_dir)
    tgz_name = join(tgz_dir, '%s-%s-building.tgz' % (portal, study_status))
    tgz_name_final = join(tgz_dir, '%s-%s.tgz' % (portal, study_status))
    txt_lines = [
        "biom fp\tsample fp\tprep fp\tqiita artifact id\tplatform\t"
        "target gene\tmerging scheme\tartifact software\tparent software"
    ]
    with topen(tgz_name, "w|gz") as tgz:
        for biom_fp, sample_fp, prep_fp, aid, pform, tg, ms, asv, psv in data:
            txt_lines.append(
                "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" %
                (biom_fp, sample_fp, prep_fp, aid, pform, tg, ms, asv, psv))
            tgz.add(join(bdir, biom_fp), arcname=biom_fp, recursive=False)
            tgz.add(join(bdir, sample_fp), arcname=sample_fp, recursive=False)
            tgz.add(join(bdir, prep_fp), arcname=prep_fp, recursive=False)
        info = TarInfo(name='%s-%s-%s.txt' % (portal, study_status, ts))
        txt_hd = BytesIO()
        txt_hd.write(bytes('\n'.join(txt_lines), 'ascii'))
        txt_hd.seek(0)
        info.size = len(txt_hd.read())
        txt_hd.seek(0)
        tgz.addfile(tarinfo=info, fileobj=txt_hd)

    with open(tgz_name, "rb") as f:
        md5sum = md5()
        for c in iter(lambda: f.read(4096), b""):
            md5sum.update(c)

    rename(tgz_name, tgz_name_final)

    vals = [('filepath', tgz_name_final[len(working_dir):], r_client.set),
            ('md5sum', md5sum.hexdigest(), r_client.set),
            ('time', time, r_client.set)]
    for k, v, f in vals:
        redis_key = '%s:release:%s:%s' % (portal, study_status, k)
        # important to "flush" variables to avoid errors
        r_client.delete(redis_key)
        f(redis_key, v)
コード例 #51
0
ファイル: shownet.py プロジェクト: lxmunc/CADLab
    def plot_predictions(self):
        epoch, batch, data = self.get_next_batch(train=False)  # get a test batch
        num_classes = self.test_data_provider.get_num_classes()
        NUM_ROWS = 2
        NUM_COLS = 4
        NUM_IMGS = NUM_ROWS * NUM_COLS if not self.save_preds else data[0].shape[1]
        NUM_TOP_CLASSES = min(num_classes, 5)  # show this many top labels
        NUM_OUTPUTS = self.model_state["layers"][self.softmax_name]["outputs"]
        PRED_IDX = 1

        label_names = [lab.split(",")[0] for lab in self.test_data_provider.batch_meta["label_names"]]
        if self.only_errors:
            preds = n.zeros((data[0].shape[1], NUM_OUTPUTS), dtype=n.single)
        else:
            preds = n.zeros((NUM_IMGS, NUM_OUTPUTS), dtype=n.single)
            # rand_idx = nr.permutation(n.r_[n.arange(1), n.where(data[1] == 552)[1], n.where(data[1] == 795)[1], n.where(data[1] == 449)[1], n.where(data[1] == 274)[1]])[:NUM_IMGS]
            rand_idx = nr.randint(0, data[0].shape[1], NUM_IMGS)
            if NUM_IMGS < data[0].shape[1]:
                data = [n.require(d[:, rand_idx], requirements="C") for d in data]
        #        data += [preds]
        # Run the model
        print [d.shape for d in data], preds.shape
        self.libmodel.startFeatureWriter(data, [preds], [self.softmax_name])
        IGPUModel.finish_batch(self)
        print preds
        data[0] = self.test_data_provider.get_plottable_data(data[0])

        if self.save_preds:
            if not gfile.Exists(self.save_preds):
                gfile.MakeDirs(self.save_preds)
            preds_thresh = preds > 0.5  # Binarize predictions
            data[0] = data[0] * 255.0
            data[0][data[0] < 0] = 0
            data[0][data[0] > 255] = 255
            data[0] = n.require(data[0], dtype=n.uint8)
            dir_name = "%s_predictions_batch_%d" % (os.path.basename(self.save_file), batch)
            tar_name = os.path.join(self.save_preds, "%s.tar" % dir_name)
            tfo = gfile.GFile(tar_name, "w")
            tf = TarFile(fileobj=tfo, mode="w")
            for img_idx in xrange(NUM_IMGS):
                img = data[0][img_idx, :, :, :]
                imsave = Image.fromarray(img)
                prefix = (
                    "CORRECT"
                    if data[1][0, img_idx] == preds_thresh[img_idx, PRED_IDX]
                    else "FALSE_POS"
                    if preds_thresh[img_idx, PRED_IDX] == 1
                    else "FALSE_NEG"
                )
                file_name = "%s_%.2f_%d_%05d_%d.png" % (
                    prefix,
                    preds[img_idx, PRED_IDX],
                    batch,
                    img_idx,
                    data[1][0, img_idx],
                )
                #                gf = gfile.GFile(file_name, "w")
                file_string = StringIO()
                imsave.save(file_string, "PNG")
                tarinf = TarInfo(os.path.join(dir_name, file_name))
                tarinf.size = file_string.tell()
                file_string.seek(0)
                tf.addfile(tarinf, file_string)
            tf.close()
            tfo.close()
            #                gf.close()
            print "Wrote %d prediction PNGs to %s" % (preds.shape[0], tar_name)
        else:
            fig = pl.figure(3, figsize=(12, 9))
            fig.text(0.4, 0.95, "%s test samples" % ("Mistaken" if self.only_errors else "Random"))
            if self.only_errors:
                # what the net got wrong
                if NUM_OUTPUTS > 1:
                    err_idx = [i for i, p in enumerate(preds.argmax(axis=1)) if p not in n.where(data[2][:, i] > 0)[0]]
                else:
                    err_idx = n.where(data[1][0, :] != preds[:, 0].T)[0]
                    print err_idx
                err_idx = r.sample(err_idx, min(len(err_idx), NUM_IMGS))
                data[0], data[1], preds = data[0][:, err_idx], data[1][:, err_idx], preds[err_idx, :]

            import matplotlib.gridspec as gridspec
            import matplotlib.colors as colors

            cconv = colors.ColorConverter()
            gs = gridspec.GridSpec(NUM_ROWS * 2, NUM_COLS, width_ratios=[1] * NUM_COLS, height_ratios=[2, 1] * NUM_ROWS)
            # print data[1]
            for row in xrange(NUM_ROWS):
                for col in xrange(NUM_COLS):
                    img_idx = row * NUM_COLS + col
                    if data[0].shape[0] <= img_idx:
                        break
                    pl.subplot(gs[(row * 2) * NUM_COLS + col])
                    # pl.subplot(NUM_ROWS*2, NUM_COLS, row * 2 * NUM_COLS + col + 1)
                    pl.xticks([])
                    pl.yticks([])
                    img = data[0][img_idx, :, :, :]
                    img = img.squeeze()
                    if len(img.shape) > 2:  # more than 2 dimensions
                        if img.shape[2] is 2:  # if two channels
                            # copy 2nd to 3rd channel for visualization
                            a1 = img
                            a2 = img[:, :, 1]
                            a2 = a2[:, :, n.newaxis]
                            img = n.concatenate((a1, a2), axis=2)
                        pl.imshow(img, interpolation="lanczos")
                    else:
                        pl.imshow(img, interpolation="lanczos", cmap=pl.gray())
                    show_title = data[1].shape[0] == 1
                    true_label = [int(data[1][0, img_idx])] if show_title else n.where(data[1][:, img_idx] == 1)[0]
                    # print true_label
                    # print preds[img_idx,:].shape
                    # print preds[img_idx,:].max()
                    true_label_names = [label_names[i] for i in true_label]
                    img_labels = sorted(zip(preds[img_idx, :], label_names), key=lambda x: x[0])[-NUM_TOP_CLASSES:]
                    # print img_labels
                    axes = pl.subplot(gs[(row * 2 + 1) * NUM_COLS + col])
                    height = 0.5
                    ylocs = n.array(range(NUM_TOP_CLASSES)) * height
                    pl.barh(
                        ylocs,
                        [l[0] for l in img_labels],
                        height=height,
                        color=["#ffaaaa" if l[1] in true_label_names else "#aaaaff" for l in img_labels],
                    )
                    # pl.title(", ".join(true_labels))
                    if show_title:
                        pl.title(", ".join(true_label_names), fontsize=15, fontweight="bold")
                    else:
                        print true_label_names
                    pl.yticks(
                        ylocs + height / 2,
                        [l[1] for l in img_labels],
                        x=1,
                        backgroundcolor=cconv.to_rgba("0.65", alpha=0.5),
                        weight="bold",
                    )
                    for line in enumerate(axes.get_yticklines()):
                        line[1].set_visible(False)
                    # pl.xticks([width], [''])
                    # pl.yticks([])
                    pl.xticks([])
                    pl.ylim(0, ylocs[-1] + height)
                    pl.xlim(0, 1)
コード例 #52
0
ファイル: meta_util.py プロジェクト: antgonza/qiita
def generate_biom_and_metadata_release(study_status='public'):
    """Generate a list of biom/meatadata filepaths and a tgz of those files

    Parameters
    ----------
    study_status : str, optional
        The study status to search for. Note that this should always be set
        to 'public' but having this exposed helps with testing. The other
        options are 'private' and 'sandbox'
    """
    studies = qdb.study.Study.get_by_status(study_status)
    qiita_config = ConfigurationManager()
    working_dir = qiita_config.working_dir
    portal = qiita_config.portal
    bdir = qdb.util.get_db_files_base_dir()
    time = datetime.now().strftime('%m-%d-%y %H:%M:%S')

    data = []
    for s in studies:
        # [0] latest is first, [1] only getting the filepath
        sample_fp = relpath(s.sample_template.get_filepaths()[0][1], bdir)

        for a in s.artifacts(artifact_type='BIOM'):
            if a.processing_parameters is None or a.visibility != study_status:
                continue

            merging_schemes, parent_softwares = a.merging_scheme
            software = a.processing_parameters.command.software
            software = '%s v%s' % (software.name, software.version)

            for x in a.filepaths:
                if x['fp_type'] != 'biom' or 'only-16s' in x['fp']:
                    continue
                fp = relpath(x['fp'], bdir)
                for pt in a.prep_templates:
                    categories = pt.categories()
                    platform = ''
                    target_gene = ''
                    if 'platform' in categories:
                        platform = ', '.join(
                            set(pt.get_category('platform').values()))
                    if 'target_gene' in categories:
                        target_gene = ', '.join(
                            set(pt.get_category('target_gene').values()))
                    for _, prep_fp in pt.get_filepaths():
                        if 'qiime' not in prep_fp:
                            break
                    prep_fp = relpath(prep_fp, bdir)
                    # format: (biom_fp, sample_fp, prep_fp, qiita_artifact_id,
                    #          platform, target gene, merging schemes,
                    #          artifact software/version,
                    #          parent sofware/version)
                    data.append((fp, sample_fp, prep_fp, a.id, platform,
                                 target_gene, merging_schemes, software,
                                 parent_softwares))

    # writing text and tgz file
    ts = datetime.now().strftime('%m%d%y-%H%M%S')
    tgz_dir = join(working_dir, 'releases')
    create_nested_path(tgz_dir)
    tgz_name = join(tgz_dir, '%s-%s-building.tgz' % (portal, study_status))
    tgz_name_final = join(tgz_dir, '%s-%s.tgz' % (portal, study_status))
    txt_lines = [
        "biom fp\tsample fp\tprep fp\tqiita artifact id\tplatform\t"
        "target gene\tmerging scheme\tartifact software\tparent software"]
    with topen(tgz_name, "w|gz") as tgz:
        for biom_fp, sample_fp, prep_fp, aid, pform, tg, ms, asv, psv in data:
            txt_lines.append("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" % (
                biom_fp, sample_fp, prep_fp, aid, pform, tg, ms, asv, psv))
            tgz.add(join(bdir, biom_fp), arcname=biom_fp, recursive=False)
            tgz.add(join(bdir, sample_fp), arcname=sample_fp, recursive=False)
            tgz.add(join(bdir, prep_fp), arcname=prep_fp, recursive=False)
        info = TarInfo(name='%s-%s-%s.txt' % (portal, study_status, ts))
        txt_hd = BytesIO()
        txt_hd.write(bytes('\n'.join(txt_lines), 'ascii'))
        txt_hd.seek(0)
        info.size = len(txt_hd.read())
        txt_hd.seek(0)
        tgz.addfile(tarinfo=info, fileobj=txt_hd)

    with open(tgz_name, "rb") as f:
        md5sum = md5()
        for c in iter(lambda: f.read(4096), b""):
            md5sum.update(c)

    rename(tgz_name, tgz_name_final)

    vals = [
        ('filepath', tgz_name_final[len(working_dir):], r_client.set),
        ('md5sum', md5sum.hexdigest(), r_client.set),
        ('time', time, r_client.set)]
    for k, v, f in vals:
        redis_key = '%s:release:%s:%s' % (portal, study_status, k)
        # important to "flush" variables to avoid errors
        r_client.delete(redis_key)
        f(redis_key, v)
コード例 #53
0
ファイル: files.py プロジェクト: johnnoone/aiodocker
def make_dockerfile(obj):
    """Transform obj to a docker tar.
    """

    if isinstance(obj, TarReader):
        return obj

    if isinstance(obj, str) and os.path.isfile(obj):
        archive = None
        ext = os.path.splitext(obj)
        if ext == '.tar':
            encoding = None
            archive = True
        elif ext in ('.tgz', '.gz'):
            encoding = 'gzip'
            archive = True
        elif ext in ('.tbz', '.tbz2', '.tb2', '.bz2'):
            encoding = 'bz2'
            archive = True
        elif ext in ('.tz', '.Z'):
            encoding = 'compress'
            archive = True
        elif ext in ('.tlz', '.lz', '.lzma'):
            encoding = 'lzma'
            archive = True
        elif ext in ('.txz', '.xz'):  # lzma & lzma2
            encoding = 'xz'
            archive = True

        with open(obj, 'rb') as file:
            if archive:
                return TarReader(file, encoding)

            # Let's pretend it's a single Dockerfile. Open it
            try:
                obj = TarFile.open(fileobj=file)
            except TarError:
                obj = io.BytesIO(file.read())

    if isinstance(obj, str) and os.path.isdir(obj):
        raise NotImplementedError('Currently not implemented')

    if isinstance(obj, str):
        raise ValueError('%r is not a Dockerfile' % obj)

    if isinstance(obj, io.StringIO):
        obj = io.BytesIO(obj.getvalue().encode('utf-8'))

    if isinstance(obj, io.BytesIO):
        out = io.BytesIO()
        info = TarInfo('Dockerfile')
        info.size = len(obj.getvalue())
        tar = TarFile.open(fileobj=out, mode='w')
        tar.addfile(info, obj)
        tar.close()
        obj = tar

    if isinstance(obj, str) and os.path.isdir(obj):
        # it's a docker context, Make a tar and compress it
        tar = TarFile.open(fileobj=NamedTemporaryFile(), mode='w:gz')
        tar.add(obj, arcname='.')
        tar.close()
        obj = tar.fileobj

    if isinstance(obj, TarFile):
        obj.close()
        obj = obj.fileobj

    if isinstance(obj, gzip.GzipFile):
        return TarReader(obj, 'gzip')

    if isinstance(obj, bz2.BZ2File):
        return TarReader(obj, 'bz2')

    if isinstance(obj, lzma.LZMAFile):
        return TarReader(obj, 'xz')

    return TarReader(obj)
コード例 #54
0
ファイル: meta_util.py プロジェクト: ElDeveloper/qiita
def generate_biom_and_metadata_release(study_status='public'):
    """Generate a list of biom/meatadata filepaths and a tgz of those files

    Parameters
    ----------
    study_status : str, optional
        The study status to search for. Note that this should always be set
        to 'public' but having this exposed helps with testing. The other
        options are 'private' and 'sandbox'
    """
    studies = qdb.study.Study.get_by_status(study_status)
    qiita_config = ConfigurationManager()
    working_dir = qiita_config.working_dir
    portal = qiita_config.portal
    bdir = qdb.util.get_db_files_base_dir()
    time = datetime.now().strftime('%m-%d-%y %H:%M:%S')

    data = []
    for s in studies:
        # [0] latest is first, [1] only getting the filepath
        sample_fp = relpath(s.sample_template.get_filepaths()[0][1], bdir)

        for a in s.artifacts(artifact_type='BIOM'):
            if a.processing_parameters is None:
                continue

            cmd_name = a.processing_parameters.command.name

            # this loop is necessary as in theory an artifact can be
            # generated from multiple prep info files
            human_cmd = []
            for p in a.parents:
                pp = p.processing_parameters
                pp_cmd_name = pp.command.name
                if pp_cmd_name == 'Trimming':
                    human_cmd.append('%s @ %s' % (
                        cmd_name, str(pp.values['length'])))
                else:
                    human_cmd.append('%s, %s' % (cmd_name, pp_cmd_name))
            human_cmd = ', '.join(human_cmd)

            for _, fp, fp_type in a.filepaths:
                if fp_type != 'biom' or 'only-16s' in fp:
                    continue
                fp = relpath(fp, bdir)
                # format: (biom_fp, sample_fp, prep_fp, qiita_artifact_id,
                #          human readable name)
                for pt in a.prep_templates:
                    for _, prep_fp in pt.get_filepaths():
                        if 'qiime' not in prep_fp:
                            break
                    prep_fp = relpath(prep_fp, bdir)
                    data.append((fp, sample_fp, prep_fp, a.id, human_cmd))

    # writing text and tgz file
    ts = datetime.now().strftime('%m%d%y-%H%M%S')
    tgz_dir = join(working_dir, 'releases')
    if not exists(tgz_dir):
        makedirs(tgz_dir)
    tgz_name = join(tgz_dir, '%s-%s-building.tgz' % (portal, study_status))
    tgz_name_final = join(tgz_dir, '%s-%s.tgz' % (portal, study_status))
    txt_hd = StringIO()
    with topen(tgz_name, "w|gz") as tgz:
        # writing header for txt
        txt_hd.write(
            "biom_fp\tsample_fp\tprep_fp\tqiita_artifact_id\tcommand\n")
        for biom_fp, sample_fp, prep_fp, artifact_id, human_cmd in data:
            txt_hd.write("%s\t%s\t%s\t%s\t%s\n" % (
                biom_fp, sample_fp, prep_fp, artifact_id, human_cmd))
            tgz.add(join(bdir, biom_fp), arcname=biom_fp, recursive=False)
            tgz.add(join(bdir, sample_fp), arcname=sample_fp, recursive=False)
            tgz.add(join(bdir, prep_fp), arcname=prep_fp, recursive=False)

        txt_hd.seek(0)
        info = TarInfo(name='%s-%s-%s.txt' % (portal, study_status, ts))
        info.size = len(txt_hd.buf)
        tgz.addfile(tarinfo=info, fileobj=txt_hd)

    with open(tgz_name, "rb") as f:
        md5sum = md5()
        for c in iter(lambda: f.read(4096), b""):
            md5sum.update(c)

    rename(tgz_name, tgz_name_final)

    vals = [
        ('filepath', tgz_name_final[len(working_dir):], r_client.set),
        ('md5sum', md5sum.hexdigest(), r_client.set),
        ('time', time, r_client.set)]
    for k, v, f in vals:
        redis_key = '%s:release:%s:%s' % (portal, study_status, k)
        # important to "flush" variables to avoid errors
        r_client.delete(redis_key)
        f(redis_key, v)
コード例 #55
0
ファイル: test_context.py プロジェクト: pigaov10/plone4.3
 def _addOneMember(path, data, modtime):
     stream = StringIO(v)
     info = TarInfo(k)
     info.size = len(v)
     info.mtime = modtime
     archive.addfile(info, stream)
コード例 #56
0
ファイル: backup.py プロジェクト: newtoncorp/oio-sds
    def compute(self, conn, data=None):
        tarinfo = TarInfo()
        tarinfo.name = self.name
        tarinfo.mod = 0o700
        tarinfo.uid = 0
        tarinfo.gid = 0
        tarinfo.type = REGTYPE
        tarinfo.linkname = ""

        if self.name == CONTAINER_PROPERTIES:
            meta = data or conn.container_get_properties(self.acct, self.ref)
            tarinfo.size = len(json.dumps(meta['properties'], sort_keys=True))
            self._filesize = tarinfo.size
            self._buf = tarinfo.tobuf(format=PAX_FORMAT)
            return
        elif self.name == CONTAINER_MANIFEST:
            tarinfo.size = len(json.dumps(data, sort_keys=True))
            self._filesize = tarinfo.size
            self._buf = tarinfo.tobuf(format=PAX_FORMAT)
            return

        entry = conn.object_get_properties(self.acct, self.ref, self.name)

        properties = entry['properties']

        # x-static-large-object
        if properties.get(SLO, False):
            tarinfo.size = int(properties.get(SLO_SIZE))
            _, slo = conn.object_fetch(self.acct,
                                       self.ref,
                                       self.name,
                                       properties=False)
            self._slo = json.loads("".join(slo), object_pairs_hook=OrderedDict)
            self._checksums = {}
            # format MD5 to share same format as multi chunks object
            offset = 0
            for idx, ck in enumerate(self._slo):
                self._checksums[idx] = {
                    'hash': ck['hash'].upper(),
                    'size': ck['bytes'],
                    'offset': offset
                }
                offset += ck['bytes']
        else:
            tarinfo.size = int(entry['length'])
            meta, chunks = conn.object_locate(self.acct,
                                              self.ref,
                                              self.name,
                                              properties=False)
            storage_method = STORAGE_METHODS.load(meta['chunk_method'])
            chunks = _sort_chunks(chunks, storage_method.ec)
            for idx in chunks:
                chunks[idx] = chunks[idx][0]
                del chunks[idx]['url']
                del chunks[idx]['score']
                del chunks[idx]['pos']
            self._checksums = chunks
        self._filesize = tarinfo.size

        # XATTR
        # do we have to store basic properties like policy, ... ?
        for key, val in properties.items():
            assert isinstance(val, basestring), \
                "Invalid type for %s:%s:%s" % (self.acct, self.name, key)
            if self.slo and key in SLO_HEADERS:
                continue
            tarinfo.pax_headers[SCHILY + key] = val
        tarinfo.pax_headers['mime_type'] = entry['mime_type']
        self._buf = tarinfo.tobuf(format=PAX_FORMAT)
コード例 #57
0
ファイル: misc.py プロジェクト: OpenTTD/musa
def tar_add_file_from_string(tar, tar_path, filename, content):
	file = StringIO.StringIO(content)
	info = TarInfo(tar_join_path(tar_path, filename))
	info.size = len(content)
	tar.addfile(info, file)