Beispiel #1
0
 def __init__(self, fd_or_path, mode="w"):
   if hasattr(fd_or_path, "write"):
     self.tar_fd = tarfile.open(mode=mode,
                                fileobj=fd_or_path,
                                encoding="utf-8")
   else:
     self.tar_fd = tarfile.open(name=fd_or_path, mode=mode, encoding="utf-8")
Beispiel #2
0
    def extract_images(self, overwrite=False):
        for setn in ('train', 'val'):
            img_dir = os.path.join(self.out_dir, setn)

            neon_logger.display("Extracting %s files" % (setn))
            toptar = getattr(self, setn + '_tar')
            label_dict = getattr(self, setn + '_labels')
            name_slice = slice(None, 9) if setn == 'train' else slice(15, -5)
            with tarfile.open(toptar) as tf:
                for s in tf.getmembers():
                    label = label_dict[s.name[name_slice]]
                    subpath = os.path.join(img_dir, str(label))
                    if not os.path.exists(subpath):
                        os.makedirs(subpath)
                    if setn == 'train':
                        tarfp = tarfile.open(fileobj=tf.extractfile(s))
                        file_list = tarfp.getmembers()
                    else:
                        tarfp = tf
                        file_list = [s]

                    for fobj in file_list:
                        fname = os.path.join(subpath, fobj.name)
                        if not os.path.exists(fname) or overwrite:
                            with open(fname, 'wb') as jf:
                                jf.write(tarfp.extractfile(fobj).read())
Beispiel #3
0
def build_openssl(config, basedir):
    cfg = None
    for key in OPENSSL['build']:
        if fnmatch.fnmatch(config, key):
            cfg = key

    if not cfg:
        return

    dstdir   = os.path.join(basedir, config, 'openssl')
    location = download_file(OPENSSL['url'], basedir, OPENSSL['sha1'])
    relname  = os.path.basename(location)[:os.path.basename(location).index('.tar')]
    srcdir   = os.path.join(basedir, relname)

    def is_compiled():
        compiled = exists(os.path.join(dstdir, 'include', 'openssl', 'ssl.h'))
        for lib in OPENSSL['build'][cfg]['libs']:
            compiled = compiled and exists(os.path.join(dstdir, 'lib', lib))
        return compiled

    if not is_compiled():
        rmdir(srcdir)
        tarfile.open(location).extractall(basedir)
        os.chdir(srcdir)
        opts = OPENSSL['build'][cfg]
        shell('perl Configure --openssldir=%s %s' % (dstdir, opts['configure']))
        for cmd in opts['build']:
            shell(cmd)
        if not is_compiled():
            error("Unable to compile OpenSSL for your system, aborting.")

    return OPENSSL['build'][cfg]['os_libs']
Beispiel #4
0
def maybe_download_and_extract(data_url, dest_dir='/tmp/imagenet'):
  """
  Download and extract model tar file.  If the pretrained model we're using doesn't already exist,
   downloads it and unpacks it into a directory.
  :param data_url:  url where tar.gz file exists
  :param dest_dir:  destination directory untar to
  :return:
  """
  if not os.path.exists(dest_dir):
    os.makedirs(dest_dir)
  filename = data_url.split('/')[-1]
  filepath = os.path.join(dest_dir, filename)
  if not os.path.exists(filepath):

    def _progress(count, block_size, total_size):
      sys.stdout.write('\r>> Downloading %s %.1f%%' %
                       (filename,
                        float(count * block_size) / float(total_size) * 100.0))
      sys.stdout.flush()

    filepath, _ = urllib.request.urlretrieve(data_url,
                                             filepath,
                                             _progress)
    print()
    statinfo = os.stat(filepath)
    print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
    tarfile.open(filepath, 'r:gz').extractall(dest_dir)
    def setUp(self):
        super(ImportTestCase, self).setUp()
        self.url = reverse_course_url('import_handler', self.course.id)
        self.content_dir = path(tempfile.mkdtemp())

        def touch(name):
            """ Equivalent to shell's 'touch'"""
            with file(name, 'a'):
                os.utime(name, None)

        # Create tar test files -----------------------------------------------
        # OK course:
        good_dir = tempfile.mkdtemp(dir=self.content_dir)
        os.makedirs(os.path.join(good_dir, "course"))
        with open(os.path.join(good_dir, "course.xml"), "w+") as f:
            f.write('<course url_name="2013_Spring" org="EDx" course="0.00x"/>')

        with open(os.path.join(good_dir, "course", "2013_Spring.xml"), "w+") as f:
            f.write('<course></course>')

        self.good_tar = os.path.join(self.content_dir, "good.tar.gz")
        with tarfile.open(self.good_tar, "w:gz") as gtar:
            gtar.add(good_dir)

        # Bad course (no 'course.xml' file):
        bad_dir = tempfile.mkdtemp(dir=self.content_dir)
        touch(os.path.join(bad_dir, "bad.xml"))
        self.bad_tar = os.path.join(self.content_dir, "bad.tar.gz")
        with tarfile.open(self.bad_tar, "w:gz") as btar:
            btar.add(bad_dir)

        self.unsafe_common_dir = path(tempfile.mkdtemp(dir=self.content_dir))
def download_20newsgroups(target_dir, cache_path):
    """Download the 20 newsgroups data and stored it as a zipped pickle."""
    archive_path = os.path.join(target_dir, ARCHIVE_NAME)
    train_path = os.path.join(target_dir, TRAIN_FOLDER)
    test_path = os.path.join(target_dir, TEST_FOLDER)

    if not os.path.exists(target_dir):
        os.makedirs(target_dir)

    if os.path.exists(archive_path):
        # Download is not complete as the .tar.gz file is removed after
        # download.
        logger.warning("Download was incomplete, downloading again.")
        os.remove(archive_path)

    logger.warning("Downloading dataset from %s (14 MB)", URL)
    opener = urlopen(URL)
    with open(archive_path, 'wb') as f:
        f.write(opener.read())

    logger.info("Decompressing %s", archive_path)
    tarfile.open(archive_path, "r:gz").extractall(path=target_dir)
    os.remove(archive_path)

    # Store a zipped pickle
    cache = dict(train=load_files(train_path, encoding='latin1'),
                 test=load_files(test_path, encoding='latin1'))
    compressed_content = codecs.encode(pickle.dumps(cache), 'zlib_codec')
    with open(cache_path, 'wb') as f:
        f.write(compressed_content)

    shutil.rmtree(target_dir)
    return cache
Beispiel #7
0
def maybe_download_and_extract():
  """Download and extract model tar file.

  If the pretrained model we're using doesn't already exist, this function
  downloads it from the TensorFlow.org website and unpacks it into a directory.
  """
  dest_directory = FLAGS.model_dir
  if not os.path.exists(dest_directory):
    os.makedirs(dest_directory)
  filename = DATA_URL.split('/')[-1]
  filepath = os.path.join(dest_directory, filename)
  if not os.path.exists(filepath):

    def _progress(count, block_size, total_size):
      sys.stdout.write('\r>> Downloading %s %.1f%%' %
                       (filename,
                        float(count * block_size) / float(total_size) * 100.0))
      sys.stdout.flush()

    filepath, _ = urllib.request.urlretrieve(DATA_URL,
                                             filepath,
                                             _progress)
    print()
    statinfo = os.stat(filepath)
    print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
  tarfile.open(filepath, 'r:gz').extractall(dest_directory)
def build_bundle(args):
    version = args.version
    temp_dir = tempfile.mkdtemp()
    start_dir = os.getcwd()

    try:
        os.putenv('CONDA_ENVS_PATH', temp_dir)

        # 1. Install OpenMDAO to a temporary conda environment
        # 2. Grab all packages
        # 3. Make tar file
        create_env(
            'openmdao-bundle',
            ['openmdao=={version}'.format(version=version)],
            channel='http://conda.binstar.org/openmdao',
            yes=True
        )

        os.chdir('{envs_path}/.pkgs'.format(envs_path=temp_dir))
        pkgs = glob.glob('*.tar.bz2')
        out = tarfile.open('openmdao.tar', mode='w')

        with tarfile.open('openmdao.tar', mode='w') as tar:
            for pkg in pkgs:
                tar.add(pkg, recursive=False)

        shutil.move(
            'openmdao.tar',
            '{start_dir}/openmdao.tar'.format(start_dir=start_dir)
        )

    finally:
        os.chdir(start_dir)
        os.unsetenv('CONDA_ENVS_PATH')
        shutil.rmtree(temp_dir)
Beispiel #9
0
Datei: maf.py Projekt: pfi/maf
def unpack_maflib(directory):
    with _Cleaner(directory) as c:
        content = _read_archive(__file__)

        os.makedirs(os.path.join(directory, 'maflib'))
        os.chdir(directory)

        bz2_name = TAR_NAME + '.bz2'
        with open(bz2_name, 'wb') as f:
            f.write(content)

        try:
            t = tarfile.open(bz2_name)
        except:
            try:
                os.system('bunzip2 ' + bz2_name)
                t = tarfile.open(TAR_NAME)
            except:
                raise Exception('Cannot extract maflib. Check that python bz2 module or bunzip2 command is available.')

        try:
            t.extractall()
        finally:
            t.close()

        try:
            os.remove(bz2_name)
            os.remove(TAR_NAME)
        except:
            pass

        maflib_path = os.path.abspath(os.getcwd())
        return maflib_path
Beispiel #10
0
def decompress(filename, out_dir='/tmp/decompressed'):
    """
    Given a tar.gz or a zip, extract the contents and return a list of files.
    If the out_dir already exists, we skip decompression and just return the
    files inside that dir.  
    Otherwise it will be created from scratch and
    filled with the files from the compressed file
    """
    if os.path.exists(out_dir):
        return glob.glob(os.path.join(out_dir, '*'))
    os.makedirs(out_dir)
    del_dir = False

    fn = filename #alias
    try:
        if zipfile.is_zipfile(fn):
            zipfile.ZipFile(fn, 'r').extractall(out_dir)
        elif tarfile.is_tarfile(fn):
            tarfile.open(fn, 'r').extractall(out_dir)
        else:
            raise ValueError('Invalid file type - must be tar.gz or zip')
    except Exception as e:
        del_dir = True #delete the partially created out_dir
        raise e #pass exception through
    finally:
        if del_dir:
            shutil.rmtree(out_dir)
    
    return [os.path.join(out_dir, f) for f in os.listdir(out_dir)]
Beispiel #11
0
def install_from_source(setuptools_source, pip_source):
    setuptools_temp_dir = tempfile.mkdtemp('-setuptools', 'ptvs-')
    pip_temp_dir = tempfile.mkdtemp('-pip', 'ptvs-')
    cwd = os.getcwd()

    try:
        os.chdir(setuptools_temp_dir)
        print('Downloading setuptools from ' + setuptools_source)
        sys.stdout.flush()
        setuptools_package, _ = urlretrieve(setuptools_source, 'setuptools.tar.gz')

        package = tarfile.open(setuptools_package)
        try:
            safe_members = [m for m in package.getmembers() if not m.name.startswith(('..', '\\'))]
            package.extractall(setuptools_temp_dir, members=safe_members)
        finally:
            package.close()

        extracted_dirs = [d for d in os.listdir(setuptools_temp_dir) if os.path.exists(os.path.join(d, 'setup.py'))]
        if not extracted_dirs:
            raise OSError("Failed to find setuptools's setup.py")
        extracted_dir = extracted_dirs[0]

        print('\nInstalling from ' + extracted_dir)
        sys.stdout.flush()
        os.chdir(extracted_dir)
        subprocess.check_call(
            EXECUTABLE + ['setup.py', 'install', '--single-version-externally-managed', '--record', 'setuptools.txt']
        )

        os.chdir(pip_temp_dir)
        print('Downloading pip from ' + pip_source)
        sys.stdout.flush()
        pip_package, _ = urlretrieve(pip_source, 'pip.tar.gz')

        package = tarfile.open(pip_package)
        try:
            safe_members = [m for m in package.getmembers() if not m.name.startswith(('..', '\\'))]
            package.extractall(pip_temp_dir, members=safe_members)
        finally:
            package.close()

        extracted_dirs = [d for d in os.listdir(pip_temp_dir) if os.path.exists(os.path.join(d, 'setup.py'))]
        if not extracted_dirs:
            raise OSError("Failed to find pip's setup.py")
        extracted_dir = extracted_dirs[0]

        print('\nInstalling from ' + extracted_dir)
        sys.stdout.flush()
        os.chdir(extracted_dir)
        subprocess.check_call(
            EXECUTABLE + ['setup.py', 'install', '--single-version-externally-managed', '--record', 'pip.txt']
        )

        print('\nInstallation Complete')
        sys.stdout.flush()
    finally:
        os.chdir(cwd)
        shutil.rmtree(setuptools_temp_dir, ignore_errors=True)
        shutil.rmtree(pip_temp_dir, ignore_errors=True)
Beispiel #12
0
def consolidate_tarballs_job(job, fname_to_id):
    """
    Combine the contents of separate tarballs into one.
    Subdirs within the tarball will be named the keys in **fname_to_id

    :param JobFunctionWrappingJob job: passed automatically by Toil
    :param dict[str,str] fname_to_id: Dictionary of the form: file-name-prefix=FileStoreID
    :return: The file store ID of the generated tarball
    :rtype: str
    """
    work_dir = job.fileStore.getLocalTempDir()
    # Retrieve output file paths to consolidate
    tar_paths = []
    for fname, file_store_id in fname_to_id.iteritems():
        p = job.fileStore.readGlobalFile(file_store_id, os.path.join(work_dir, fname + '.tar.gz'))
        tar_paths.append((p, fname))
    # I/O
    # output_name is arbitrary as this job function returns a FileStoreId
    output_name = 'foo.tar.gz'
    out_tar = os.path.join(work_dir, output_name)
    # Consolidate separate tarballs into one
    with tarfile.open(os.path.join(work_dir, out_tar), 'w:gz') as f_out:
        for tar, fname in tar_paths:
            with tarfile.open(tar, 'r') as f_in:
                for tarinfo in f_in:
                    with closing(f_in.extractfile(tarinfo)) as f_in_file:
                        tarinfo.name = os.path.join(output_name, fname, os.path.basename(tarinfo.name))
                        f_out.addfile(tarinfo, fileobj=f_in_file)
    return job.fileStore.writeGlobalFile(out_tar)
Beispiel #13
0
def unpack(filename, destination):
    dirname = ''
    print('Extracting {0}'.format(filename))
    if filename.endswith('tar.gz'):
        tfile = tarfile.open(filename, 'r:gz')
        tfile.extractall(destination)
        dirname = tfile.getnames()[0]
    elif filename.endswith('tar.bz2'):
        tfile = tarfile.open(filename, 'r:bz2')
        tfile.extractall(destination)
        dirname = tfile.getnames()[0]
    elif filename.endswith('zip'):
        zfile = zipfile.ZipFile(filename)
        zfile.extractall(destination)
        dirname = zfile.namelist()[0]
    else:
        raise NotImplementedError('Unsupported archive type')

    # a little trick to rename tool directories so they don't contain version number
    rename_candidate = re.match(r'^([a-z][^\-]*\-*)+', dirname)

    if rename_candidate is not None:
        rename_to = rename_candidate.group(0).encode('ascii').strip('-')
        if rename_to != dirname:
            print('Renaming {0} to {1}'.format(dirname, rename_to))
            if os.path.isdir(rename_to):
                shutil.rmtree(rename_to)
            shutil.move(dirname, rename_to)
    def handle(self, dump_path, **options):
        if dump_path == "-":
            arc = tarfile.open(fileobj=sys.stdin, mode="r:gz")
        else:
            arc = tarfile.open(dump_path, mode="r:gz")
        base_path = tempfile.mkdtemp()
        arc.extractall(path=base_path)
        path = glob(os.path.join(base_path, "*"))[0]

        # media files
        # shutil.copytree(os.path.join(path, 'media'), settings.MEDIA_ROOT)
        dir_util.copy_tree(os.path.join(path, "media"), settings.MEDIA_ROOT)

        # load db fields
        old_stdout = sys.stdout
        sys.stdout = open(os.path.join(path, "backup_db_dump.json"), "w")
        call_command("dumpdata", indent=4)
        sys.stdout.close()
        sys.stdout = old_stdout
        call_command("flush", noinput=True, interactive=False)
        call_command("reset", "contenttypes", "auth", noinput=True, interactive=False)
        call_command("loaddata", os.path.join(path, "db_dump.json"))

        # rebase FilepathFields
        call_command("rebase_filepathfields", os.path.join(path, "fpf_bases_dump.json"))
Beispiel #15
0
    def __call__(self, path, target):
        """Extract C{path} into C{target} using the C{zipfile} module.

        @note: No need to use C{tarfile.is_tarfile} because we want an
        exception on failure anyway."""
        import tarfile
        tarfile.open(path, 'r').extractall(target)
Beispiel #16
0
def GetFileFromTar( path, member, rename ):
    print( 'Extracting %s from %s ...' % (rename, path) )
    tarfile.open(path, 'r:gz').extract(member)
    shutil.move(member, rename)
    tree_rmv = member.split('/')[0]
    shutil.rmtree(tree_rmv)
    return
Beispiel #17
0
 def read(self, all_tags=False, self_provides=True, *extra_tags):
     arfile = ar.Ar(fh = self.__file)
     arfile.read()
     debbin = arfile.get_file('debian-binary')
     if debbin is None:
         raise DebError(self.__path, 'no debian binary')
     if debbin.read() != '2.0\n':
         raise DebError(self.__path, 'invalid debian binary format')
     control = arfile.get_file('control.tar.gz')
     if control is not None:
         # XXX: python2.4 relies on a name
         tar = tarfile.open(name='control.tar.gz', fileobj=control)
     else:
         control = arfile.get_file('control.tar.xz')
         if control is None:
             raise DebError(self.__path, 'missing control.tar')
         if not HAVE_LZMA:
             raise DebError(self.__path, 'can\'t open control.tar.xz without python-lzma')
         decompressed = lzma.decompress(control.read())
         tar = tarfile.open(name="control.tar.xz",
                            fileobj=StringIO.StringIO(decompressed))
     try:
         name = './control'
         # workaround for python2.4's tarfile module
         if 'control' in tar.getnames():
             name = 'control'
         control = tar.extractfile(name)
     except KeyError:
         raise DebError(self.__path,
                        'missing \'control\' file in control.tar')
     self.__parse_control(control, all_tags, self_provides, *extra_tags)
     return self
Beispiel #18
0
def UnTarFile( tarFileName, targetPath, strip = 0 ):
    """ Untar the file tarFile to targetPath take off the the first strip folders."""
    global kCachePath, kInstallPath, kVerbose
    if strip == 0: # Can untar directly into target
        tarFile = tarfile.open( os.path.join( kCachePath, tarFileName ) )
        tarFile.extractall( targetPath )
        tarFile.close()
    else: # Must untar to temp then to target, note target cannot already exist!
        # First untar to a temp directory
        tempDirectory = os.path.join( kCachePath, "temp" )
        if os.path.exists( tempDirectory ): # Must be an empty temp directory
            shutil.rmtree( tempDirectory )
        tarFile = tarfile.open( os.path.join( kCachePath, tarFileName ) )
        tarFile.extractall( tempDirectory )
        tarFile.close()
        # Now choose how many components to strip
        copyDirectory = tempDirectory
        for iStrip in range( 0, strip ):
            subFolders = os.listdir( copyDirectory )
            if 'pax_global_header' in subFolders:
                subFolders.remove( 'pax_global_header' )
            copyDirectory = os.path.join( copyDirectory, subFolders[0] )
        # Now can copy, first make sure the targetPath does not exist
        if os.path.exists( targetPath ):
            shutil.rmtree( targetPath )
        # Now copy
        shutil.copytree( copyDirectory, targetPath )
        shutil.rmtree( tempDirectory )
    return "Extracted %s\n" % tarFileName
Beispiel #19
0
    def __init__(self, pack):
        self.pack = Path(pack)

        self.tar = tarfile.open(str(self.pack), 'r:*')
        f = self.tar.extractfile('METADATA/version')
        version = f.read()
        f.close()
        if version.startswith(b'REPROZIP VERSION '):
            try:
                version = int(version[17:].rstrip())
            except ValueError:
                version = None
            if version in (1, 2):
                self.version = version
                self.data_prefix = PosixPath(b'DATA')
            else:
                raise ValueError(
                    "Unknown format version %r (maybe you should upgrade "
                    "reprounzip? I only know versions 1 and 2" % version)
        else:
            raise ValueError("File doesn't appear to be a RPZ pack")

        if self.version == 1:
            self.data = self.tar
        elif version == 2:
            self.data = tarfile.open(
                fileobj=self.tar.extractfile('DATA.tar.gz'),
                mode='r:*')
        else:
            assert False
Beispiel #20
0
 def run(self, connection, args=None):
     if not self.parameters.get(self.param_key, None):  # idempotency
         return connection
     connection = super(ExtractRootfs, self).run(connection, args)
     root = self.data['download_action'][self.param_key]['file']
     root_dir = mkdtemp(basedir=DISPATCHER_DOWNLOAD_DIR)
     if self.use_tarfile:
         try:
             tar = tarfile.open(root)
             tar.extractall(root_dir)
             tar.close()
         except tarfile.TarError as exc:
             raise JobError("Unable to unpack %s: '%s' - %s" % (self.param_key, os.path.basename(root), exc))
     elif self.use_lzma:
         with contextlib.closing(lzma.LZMAFile(root)) as xz:
             with tarfile.open(fileobj=xz) as tarball:
                 try:
                     tarball.extractall(root_dir)
                 except tarfile.TarError as exc:
                     raise JobError("Unable to unpack %s: '%s' - %s" % (self.param_key, os.path.basename(root), exc))
     else:
         raise RuntimeError("Unable to decompress %s: '%s'" % (self.param_key, os.path.basename(root)))
     self.set_common_data('file', self.file_key, root_dir)
     self.logger.debug("Extracted %s to %s" % (self.file_key, root_dir))
     return connection
    def _do_load_plugin_cb(self, widget):
        self.tw.load_save_folder = self._get_execution_dir()
        file_path, loaddir = get_load_name('.tar.gz', self.tw.load_save_folder)
        if file_path is None:
            return
        try:
            # Copy to tmp file since some systems had trouble
            # with gunzip directly from datastore
            datapath = get_path(None, 'instance')
            if not os.path.exists(datapath):
                os.makedirs(datapath)
            tmpfile = os.path.join(datapath, 'tmpfile.tar.gz')
            subprocess.call(['cp', file_path, tmpfile])
            status = subprocess.call(['gunzip', tmpfile])
            if status == 0:
                tar_fd = tarfile.open(tmpfile[:-3], 'r')
            else:
                tar_fd = tarfile.open(tmpfile, 'r')
        except:
            tar_fd = tarfile.open(file_path, 'r')

        tmp_dir = tempfile.mkdtemp()

        try:
            tar_fd.extractall(tmp_dir)
            load_a_plugin(self, tmp_dir)
            self.restore_cursor()
        except:
            self.restore_cursor()
        finally:
            tar_fd.close()
            # Remove tmpfile.tar
            subprocess.call(['rm',
                             os.path.join(datapath, 'tmpfile.tar')])
    def extract(self, archive_file_path, output_file_path, progress):
        output_file_path = u"\\\\?\\" + os.path.abspath(output_file_path)

        logging.info("Extracting {0}".format(self.__url))

        os.makedirs(output_file_path)
        filename, extension = os.path.splitext(self.__file_name)
        if extension == ".gz" or extension == ".tgz":
            with tarfile.open(archive_file_path, 'r:gz') as arch:
                arch.extractall(output_file_path)
        elif extension == ".bz2":
            with tarfile.open(archive_file_path, 'r:bz2') as arch:
                arch.extractall(output_file_path)
        elif extension == ".zip":
            with zipfile.ZipFile(archive_file_path) as arch:
                arch.extractall(output_file_path)
        elif extension == ".7z":
            subprocess.call(["7za", "x", archive_file_path, "-o{}".format(output_file_path)])
        else:
            logging.error("unsupported file extension {0}".format(extension))

        for i in range(self.__tree_depth):
            sub_dirs = os.listdir(output_file_path)
            if len(sub_dirs) != 1:
                raise ValueError("unexpected archive structure,"
                                 " expected exactly one directory in {}".format(output_file_path))
            source_dir = os.path.join(output_file_path, sub_dirs[0])

            for src in os.listdir(source_dir):
                shutil.move(os.path.join(source_dir, src), output_file_path)

            shutil.rmtree(source_dir)
    def extract(self):
        # initialize the progress bar
        self.progressbar.set_fraction(0)
        self.progressbar.set_text(_('Installing'))
        self.progressbar.show()
        self.refresh_gtk()

        extracted = False
        try:
            if self.common.paths['tarball_file'][-2:] == 'xz':
                # if tarball is .tar.xz
                xz = lzma.LZMAFile(self.common.paths['tarball_file'])
                tf = tarfile.open(fileobj=xz)
                tf.extractall(self.common.paths['tbb']['dir'])
                extracted = True
            else:
                # if tarball is .tar.gz
                if tarfile.is_tarfile(self.common.paths['tarball_file']):
                    tf = tarfile.open(self.common.paths['tarball_file'])
                    tf.extractall(self.common.paths['tbb']['dir'])
                    extracted = True
        except:
            pass

        if not extracted:
            self.set_gui('task', _("Tor Browser Launcher doesn't understand the file format of {0}".format(self.common.paths['tarball_file'])), ['start_over'], False)
            self.clear_ui()
            self.build_ui()
            return

        # installation is finished, so save installed_version
        self.common.settings['installed_version'] = self.common.settings['latest_version']
        self.common.save_settings()

        self.run_task()
Beispiel #24
0
 def datafiles(self):
     """ Get list of readable datafiles from asset (multiple filenames if tar or hdf file) """
     path = os.path.dirname(self.filename)
     indexfile = os.path.join(path, self.filename + '.index')
     if os.path.exists(indexfile):
         datafiles = File2List(indexfile)
         if len(datafiles) > 0:
             return datafiles
     try:
         if tarfile.is_tarfile(self.filename):
             tfile = tarfile.open(self.filename)
             tfile = tarfile.open(self.filename)
             datafiles = tfile.getnames()
         elif zipfile.is_zipfile(self.filename):
             zfile = zipfile.ZipFile(self.filename)
             datafiles = ['/vsizip/' + os.path.join(self.filename, f)
                          for f in zfile.namelist()]
         else:
             # Try subdatasets
             fh = gdal.Open(self.filename)
             sds = fh.GetSubDatasets()
             datafiles = [s[0] for s in sds]
         if len(datafiles) > 0:
             List2File(datafiles, indexfile)
             return datafiles
         else:
             return [self.filename]
     except Exception as e:
         raise Exception('Problem accessing asset(s) in {}\n ({})'
                         .format(self.filename, e))
def _downloadAndExtractTarball(tarbalUrl, targetDir):
    try:
        remoteFile = urllib2.urlopen(tarbalUrl)
    except Exception as ex:
        print 'Failed contacting:', tarbalUrl, ' with error:"', ex, '" retrying...'
        remoteFile = urllib2.urlopen(tarbalUrl)

    try:
        shutil.rmtree(targetDir, ignore_errors=True)
        os.makedirs(targetDir)
    except OSError:
        pass

    localTarBall = os.path.join(targetDir, os.path.basename(tarbalUrl))
    targetFile = open(localTarBall, 'wb')
    while True:
        data = remoteFile.read()
        if not data:
            break
        targetFile.write(data)
    remoteFile.close()
    targetFile.close()

    print 'Expanding tarball:', localTarBall
    tarfile.open(localTarBall, 'r:gz').extractall(targetDir)
Beispiel #26
0
def extract_tar_archive(archive_path, destination_directory):
    """
    Extracts the given tarball to the given destination directory.
    It automatically handles the following compressed archives on both Python2
    and Python3.

    - gz
    - xz
    - bz2
    - lzma

    :param archive_path: The path to the archive which should be extracted.
    :type archive_path: string

    :param destination_directory: The directory where the files should be
        extracted to. The directory does not have to exist prior to calling
        this function; it will be automatically created, if not.
    :type destination_directory: string
    """
    # lzma (.lzma and .xz) compressed archives are not automatically
    # uncompressed on python2.
    if archive_path.endswith('.xz') or archive_path.endswith('.lzma'):
        if not six.PY3:
            with contextlib.closing(lzma.LZMAFile(archive_path)) as lzma_file:
                with tarfile.open(fileobj=lzma_file) as archive_file:
                    archive_file.extractall(destination_directory)
            return
    # In all other cases, tarfile handles compression automatically
    with tarfile.open(archive_path) as archive_file:
        archive_file.extractall(destination_directory)
Beispiel #27
0
def prepareTar(doc, visibleTar=False):
    # Finish the spec
    specOutput = tempfile.NamedTemporaryFile(delete=False)
    doc.finish(outputFilename=specOutput.name)
    # Build the TAR file
    if visibleTar:
        tar = tarfile.open(name="test.tar", mode='w')
    else:
        f = tempfile.NamedTemporaryFile(delete=False)
        tar = tarfile.open(fileobj=f, mode='w')
    tar.add(specOutput.name, arcname="Overview.html")
    additionalFiles = extensions.BSPublishAdditionalFiles(["images", "diagrams", "examples"])
    for fname in additionalFiles:
        try:
            if isinstance(fname, basestring):
                tar.add(fname)
            elif isinstance(fname, list):
                tar.add(fname[0], arcname=fname[1])
        except OSError:
            pass
    tar.close()
    specOutput.close()
    os.remove(specOutput.name)
    if visibleTar:
        return open("test.tar", "rb")
    else:
        f.seek(0)
        return f
Beispiel #28
0
def download_20newsgroups(target_dir, cache_path):
    """ Download the 20Newsgroups data and convert is in a zipped pickle
        storage.
    """
    archive_path = os.path.join(target_dir, ARCHIVE_NAME)
    train_path = os.path.join(target_dir, TRAIN_FOLDER)
    test_path = os.path.join(target_dir, TEST_FOLDER)

    if not os.path.exists(target_dir):
        os.makedirs(target_dir)

    if not os.path.exists(archive_path):
        logger.warn("Downloading dataset from %s (14 MB)", URL)
        opener = urllib.urlopen(URL)
        open(archive_path, 'wb').write(opener.read())

    logger.info("Decompressing %s", archive_path)
    tarfile.open(archive_path, "r:gz").extractall(path=target_dir)
    os.remove(archive_path)

    # Store a zipped pickle
    cache = dict(
            train=load_files(train_path),
            test=load_files(test_path)
        )
    open(cache_path, 'wb').write(pickle.dumps(cache).encode('zip'))
    shutil.rmtree(target_dir)
    return cache
Beispiel #29
0
    def test_make_distribution_owner_group(self):
        dist, cmd = self.get_cmd()
        cmd.formats = ['gztar']
        cmd.owner = pwd.getpwuid(0)[0]
        cmd.group = grp.getgrgid(0)[0]
        cmd.ensure_finalized()
        cmd.run()
        archive_name = join(self.tmp_dir, 'dist', 'fake-1.0.tar.gz')
        archive = tarfile.open(archive_name)
        try:
            for member in archive.getmembers():
                self.assertEqual(member.uid, 0)
                self.assertEqual(member.gid, 0)

        finally:
            archive.close()

        dist, cmd = self.get_cmd()
        cmd.formats = ['gztar']
        cmd.ensure_finalized()
        cmd.run()
        archive_name = join(self.tmp_dir, 'dist', 'fake-1.0.tar.gz')
        archive = tarfile.open(archive_name)
        try:
            for member in archive.getmembers():
                self.assertEqual(member.uid, os.getuid())

        finally:
            archive.close()
Beispiel #30
0
 def run_simcoal(self, par_file, num_sims, ploydi='1', parDir=None):
     if parDir is None:
         parDir = os.sep.join([self.dataDir, 'SimCoal', 'runs'])
     par_file_root = par_file[:-4]
     tar_name = os.sep.join([self.cacheDir, ploydi, par_file_root +
                             '.tar.bz2'])
     if os.access(tar_name, os.R_OK):
         tf = tarfile.open(tar_name)
         tar_num_sims = len(tf.getmembers()) - 3
     else:
         tar_num_sims = 0
     if tar_num_sims >= num_sims:
         tf.extractall(parDir)
         tf.close()
         return
     else:
         try:
             tf.close()
         except NameError:
             pass  # not opened in the first place, OK.
     scc = SimCoalController(self.simcoalDir)
     scc.run_simcoal(par_file, num_sims, ploydi, parDir)
     tf = tarfile.open(tar_name, 'w:bz2')
     tf.add(os.sep.join([parDir, par_file_root]), par_file_root)
     tf.close()
Beispiel #31
0
def create_bundle(executables,
                  output,
                  tarball=False,
                  rename=[],
                  chroot=None,
                  add=[],
                  no_symlink=[],
                  shell_launchers=False,
                  detect=False):
    """Handles the creation of the full bundle."""
    # Initialize these ahead of time so they're always available for error handling.
    output_filename, output_file, root_directory = None, None, None
    try:

        # Create a temporary unpackaged bundle for the executables.
        root_directory = create_unpackaged_bundle(
            executables,
            rename=rename,
            chroot=chroot,
            add=add,
            no_symlink=no_symlink,
            shell_launchers=shell_launchers,
            detect=detect,
        )

        # Populate the filename template.
        output_filename = render_template(
            output,
            executables=('-'.join(
                os.path.basename(executable) for executable in executables)),
            extension=('tgz' if tarball else 'sh'),
        )

        # Store a gzipped tarball of the bundle in memory.
        tar_stream = io.BytesIO()
        with tarfile.open(fileobj=tar_stream, mode='w:gz') as tar:
            tar.add(root_directory, arcname='exodus')

        # Configure the appropriate output mechanism.
        if output_filename == '-':
            output_file = getattr(sys.stdout, 'buffer', sys.stdout)
        else:
            output_file = open(output_filename, 'wb')

        # Construct the installation script and write it out.
        if not tarball:
            if output_filename == '-':
                base64_encoded_tarball = base64.b64encode(
                    tar_stream.getvalue()).decode('utf-8')
                script_content = render_template_file(
                    'install-bundle-noninteractive.sh',
                    base64_encoded_tarball=base64_encoded_tarball)
                output_file.write(script_content.encode('utf-8'))
            else:
                output_file.write(
                    render_template_file('install-bundle.sh').encode('utf-8'))
                output_file.write(tar_stream.getvalue())
        else:
            # Or just write out the tarball.
            output_file.write(tar_stream.getvalue())

        # Write out the success message.
        logger.info('Successfully created "%s".' % output_filename)
        return True
    except:  # noqa: E722
        raise
    finally:
        if root_directory:
            shutil.rmtree(root_directory)
        if output_file and output_filename:
            output_file.close()
            if not tarball and output_filename not in ['-', '/dev/null']:
                st = os.stat(output_filename)
                os.chmod(output_filename, st.st_mode | stat.S_IEXEC)
def create_l3c_tarball(inpdir, idnumber, tempdir, l3_tarfile):
    """
    Create the final l3c tarball to be stored in ECFS.
    """
    # -- find l3c input directory via idnumber
    dirs = os.listdir(inpdir)
    if len(dirs) > 0:
        for idir in dirs:
            if idnumber in idir:
                l3cdir = os.path.join(inpdir, idir)
    else:
        logger.info("No input in {0} matching {1} ".
                format(inpdir, idnumber))
        sys.exit(0)

    # -- list of files to be tared
    tar_files = list()

    # -- list all files
    # noinspection PyUnboundLocalVariable
    files = os.listdir(l3cdir)
    for f in files:

        if f.endswith(".nc"):
            # copy file
            source = os.path.join(l3cdir, f)
            target = os.path.join(tempdir, f)
            shutil.copy2(source, target)

            # add to list
            tar_files.append(target)

        if f.endswith(".tmp"):
            fname, fext = os.path.splitext(f)
            last_folder = l3cdir.split("/")[-1]
            # noinspection PyBroadException
            try:
                pattern = re.search('(.+?)_global',
                                    last_folder).group(1) + '_l2files'
            except:
                pattern = last_folder + '_l2files'

            # copy file
            l2_tmp = pattern + fext
            source = os.path.join(l3cdir, f)
            target = os.path.join(tempdir, l2_tmp)
            shutil.copy2(source, target)

            # add to list
            tar_files.append(target)

    # -- create final tarfile to be copied into ECFS
    tar = tarfile.open(l3_tarfile, "w:")
    for tfile in tar_files:
        filenam = os.path.basename(tfile)
        tar.add(tfile, arcname=filenam)
    tar.close()

    # -- delete input files in temp
    for tfile in tar_files:
        delete_file(tfile)

    return [l3_tarfile]
def create_l3u_tarball(inpdir, idnumber, tempdir, l3_tarfile, local, sensor):
    """
    Create the final l3u tarball to be stored in ECFS.
    """
    # -- find l3 input directory via idnumber
    if not local:
        split = "." 
        foo = l3_tarfile.split(split)
        foo[-1] = "part1.tar"
        l3_tarfile1 = split.join(foo)
        foo[-1] = "part2.tar"
        l3_tarfile2 = split.join(foo)
        
    dirs = os.listdir(inpdir)
    daily_list = list()
    if len(dirs) > 0:
        for idir in dirs:
            if idnumber in idir and not "splitting_tasklist" in idir:
                nfiles_nc = len( fnmatch.filter(os.listdir(idir), '*.nc'))
                if nfiles_nc ==4:
                    daily_list.append(os.path.join(inpdir, idir))
    else:
        logger.info("No input in {0} matching {1} ".
                format(inpdir, idnumber))
        sys.exit(0)

    # -- final files to be tared
    tar_files = list()

    # -- sort daily list
    daily_list.sort()

    # -- make daily tarballs
    for daily in daily_list:

        # define daily tempdir
        daily_tempdir = os.path.join(tempdir,
                                     daily.split("/")[-1])
        create_dir(daily_tempdir)

        # list of files to be tared
        daily_tar_files = list()

        # list all files
        files = os.listdir(daily)
        for f in files:

            if f.endswith(".nc"):
                # copy file
                source = os.path.join(daily, f)
                if local:
                    index = f.find("-fv")
                    f = f[:index] + "_Europe" + f[index:]
                target = os.path.join(daily_tempdir, f)
                ncbase = os.path.splitext(f)[0]
                shutil.copy2(source, target)

                # add to list
                daily_tar_files.append(target)

            if f.endswith(".tmp"):
                fname, fext = os.path.splitext(f)
                last_folder = daily_tempdir.split("/")[-1]
                # noinspection PyBroadException
                try:
                    pattern = re.search('(.+?)_global',
                                        last_folder).group(1) + '_l2files'
                except:
                    pattern = last_folder + '_l2files'

                # copy file
                l2_tmp = pattern + fext
                source = os.path.join(daily, f)
                target = os.path.join(daily_tempdir, l2_tmp)
                shutil.copy2(source, target)

                # add to list
                daily_tar_files.append(target)

        # create daily tarfile
        ncbase = "-".join([i for i in ncbase.split("-") if "CLD_" not in i and "RAD_" not in i])
        daily_l3_tarfile = os.path.join(tempdir, ncbase + ".tar")
        tar = tarfile.open(daily_l3_tarfile, "w:")
        for tfile in daily_tar_files:
            filenam = os.path.basename(tfile)
            tar.add(tfile, arcname=filenam)
        tar.close()

        # collect daily tarfiles for final tarball
        tar_files.append(daily_l3_tarfile)

        # delete daily_tempdir
        delete_dir(daily_tempdir)

    # -- make monthly tarballs containing all daily tarballs
    if not local:
        tar1 = tarfile.open(l3_tarfile1, "w:")
        tar2 = tarfile.open(l3_tarfile2, "w:")
    else:
        tar = tarfile.open(l3_tarfile, "w:")
    half_tar_files = len(tar_files) / 2
    file_index = 0
    for tfile in tar_files:
        file_index = file_index + 1
        filenam = os.path.basename(tfile)
        if not local: 
            if file_index <= half_tar_files:
                tar1.add(tfile, arcname=filenam)
            else:
                tar2.add(tfile, arcname=filenam)
        else:
            # filedir = os.path.dirname(tfile)
            tar.add(tfile, arcname=filenam)
            
    if not local:
        tar1.close()
        tar2.close()
        return [l3_tarfile1, l3_tarfile2]
    else:
        tar.close()
        return [l3_tarfile]

    # -- delete daily tarballs
    for tfile in tar_files:
        delete_file(tfile)
def create_l2_tarball(inpdir, idnumber, tempdir, l2_tarfile):
    """
    Create the final l2 tarball to be stored in ECFS.
    """
    # -- find l2 input directory via idnumber
    dirs = os.listdir(inpdir)
    daily_list = list()
    if len(dirs) > 0:
        for idir in dirs:
            if idnumber in idir:
                daily_list.append(os.path.join(inpdir, idir))
    else:
        logger.info("No input in {0} matching {1} ".
                format(inpdir, idnumber))
        sys.exit(0)

    # -- final files to be stored in ECFS (daily .tar.gz)
    tar_file_list = list()

    # -- sort daily list
    daily_list.sort()

    # -- make daily tarballs
    for daily in daily_list:

        # date and subdir of daily
        idate_folder = daily.split("/")[-1]
        idate = idate_folder.split("_")[0]

        # list all orbitfiles
        filver = get_file_version()
        suffix = "fv"+filver+".nc"
        files = get_file_list_via_filext(daily, suffix)

        # create daily tarfilename
        ncfile = files.pop()
        ncbase = os.path.splitext(os.path.basename(ncfile))[0]
        nclist = ncbase.split("-")[1:]
        ncstr = "-".join(nclist)
        tarbas = idate + '-' + ncstr
        daily_l2_tarfile = os.path.join(tempdir, tarbas + ".tar")

        # create daily tarfile containing all orbits
        # print (" * Create \'%s\'" % daily_l2_tarfile)
        tar = tarfile.open(daily_l2_tarfile, "w:")
        for tfile in files:
            # filedir = os.path.dirname(tfile)
            filenam = os.path.basename(tfile)
            tar.add(tfile, arcname=filenam)
        tar.close()

        # collect daily tarfiles for final tarball
        tar_file_list.append(daily_l2_tarfile)

    # --------------------------------------------------------
    # NOT POSSIBLE -> larger than 32 GB (limit)
    # --------------------------------------------------------
    # -- make monthly tarballs containing all daily tarballs
    # tar = tarfile.open( l2_tarfile, "w:gz" )
    # for tfile in tar_file_list:
    #    filedir = os.path.dirname(tfile)
    #    filenam = os.path.basename(tfile)
    #    tar.add( tfile, arcname=filenam )
    # tar.close()
    #
    # -- delete daily tarballs
    # for tfile in tar_file_list:
    #    delete_file( tfile )
    # --------------------------------------------------------

    return tar_file_list
Beispiel #35
0
        urlretrieve("http://biosemantics.org/PatentCorpus/Patent_Corpus.rar",
                    "Patent_Corpus.rar")
        with rarfile.RarFile("Patent_Corpus.rar") as f:
            f.extractall()
            shutil.move(os.path.join("Patent_Corpus/Full_set"), os.path.join("data", "biosemantics"))
            os.remove("Patent_Corpus.rar")
            shutil.rmtree("Patent_Corpus")

    # CellFinder
    if is_empty('cellfinder'):
        urlretrieve(
            "https://www.informatik.hu-berlin.de/de/forschung/gebiete/wbi/resources/cellfinder/cellfinder1_brat.tar.gz",
            "cellfinder.tar.gz")
        path = os.path.join(data_dir, 'cellfinder')
        os.makedirs(path)
        with tarfile.open("cellfinder.tar.gz") as f:
            f.extractall(path)
        os.remove("cellfinder.tar.gz")

    # CHEMDNER Patents
    if is_missing(['cemp_train.txt', 'cemp_train.ann',
                   'cemp_val.txt', 'cemp_val.ann']):
        urlretrieve('http://www.biocreative.org/media/store/files/2015/cemp_training_set.tar.gz',
                    'cemp_train.tar.gz')
        urlretrieve('http://www.biocreative.org/media/store/files/2015/cemp_development_set_v03.tar.gz',
                    'cemp_val.tar.gz')

        with tarfile.open('cemp_train.tar.gz') as f:
            f.extractall()
        os.remove('cemp_train.tar.gz')
Beispiel #36
0
def compress_dicoms(dicom_list, out_prefix, tempdirs, overwrite):
    """Archives DICOMs into a tarball

    Also tries to do it reproducibly, so takes the date for files
    and target tarball based on the series time (within the first file)

    Parameters
    ----------
    dicom_list : list of str
      list of dicom files
    out_prefix : str
      output path prefix, including the portion of the output file name
      before .dicom.tgz suffix
    tempdirs : object
      TempDirs object to handle multiple tmpdirs
    overwrite : bool
      Overwrite existing tarfiles

    Returns
    -------
    filename : str
      Result tarball
    """

    tmpdir = tempdirs(prefix='dicomtar')
    outtar = out_prefix + '.dicom.tgz'

    if op.exists(outtar) and not overwrite:
        lgr.info("File {} already exists, will not overwrite".format(outtar))
        return
    # tarfile encodes current time.time inside making those non-reproducible
    # so we should choose which date to use.
    # Solution from DataLad although ugly enough:

    dicom_list = sorted(dicom_list)
    dcm_time = get_dicom_series_time(dicom_list)

    def _assign_dicom_time(ti):
        # Reset the date to match the one of the last commit, not from the
        # filesystem since git doesn't track those at all
        ti.mtime = dcm_time
        return ti

    # poor man mocking since can't rely on having mock
    try:
        import time
        _old_time = time.time
        time.time = lambda: dcm_time
        if op.lexists(outtar):
            os.unlink(outtar)
        with tarfile.open(outtar, 'w:gz', dereference=True) as tar:
            for filename in dicom_list:
                outfile = op.join(tmpdir, op.basename(filename))
                if not op.islink(outfile):
                    os.symlink(op.realpath(filename), outfile)
                # place into archive stripping any lead directories and
                # adding the one corresponding to prefix
                tar.add(outfile,
                        arcname=op.join(op.basename(out_prefix),
                                        op.basename(outfile)),
                        recursive=False,
                        filter=_assign_dicom_time)
    finally:
        time.time = _old_time
        tempdirs.rmtree(tmpdir)

    return outtar
Beispiel #37
0
def create_backup_from_onprem(ctx, config_file, profile, **kwargs):

    if os.name == 'nt':
        exit("This script is not supported on Windows operating systems.")

    if profile == Sentinel(DEFAULT_PROFILE):
        profile = DEFAULT_PROFILE

    initial_dict = {
        'config_file': config_file,
        'default_values_from_file': {},
        'profile': profile,
        'cert_bundle': None,
        'endpoint': None,
        'request_id': None,
        'no_retry': None,
        'debug': False,
        'proxy': None,
        'settings': {'proxy': None},
        'parameter_aliases': {},
        'region': None,
    }

    ctx.obj = initial_dict
    cli_root.load_default_values(ctx, config_file, profile)
    cli_util.populate_dict_key_with_default_value(ctx, 'region', click.STRING)

    availability_domain = kwargs['availability_domain']
    opcinstallerdir = kwargs['opc_installer_dir']
    rmanchannels = int(kwargs['rman_channels'])
    if 'rman_password' in kwargs:
        rmanpassword = kwargs['rman_password']
    if 'additional_opc_args' in kwargs:
        additionalopcargs = kwargs['additional_opc_args']

    # default tmpDir if it is not supplied
    tmpdir = os.path.join(tempfile.gettempdir(), "onprem_upload")
    if 'tmp_dir' in kwargs and kwargs['tmp_dir']:
        tmpdir = os.path.abspath(kwargs['tmp_dir'])

    # Make as many checks as possible before taking a backup.

    # Make sure opcInstaller exists and is readable
    opcinstaller = os.path.join(opcinstallerdir, "opc_install.jar")
    if not os.path.isfile(opcinstaller) and os.access(opcinstaller, os.R_OK):
        sys.exit("Could not access " + opcinstaller)

    # create tmpDir if it doesn't exist
    os.makedirs(tmpdir, 0o700)
    # make sure it is a directory and is empty
    if not os.path.isdir(tmpdir):
        sys.exit(tmpdir + " is not a directory")
    if os.listdir(tmpdir):
        sys.exit(tmpdir + " is not empty")

    # Verify ORACLE_HOME and ORACLE_SID are set and rman is good
    if 'ORACLE_HOME' not in os.environ or 'ORACLE_SID' not in os.environ:
        sys.exit("ORACLE_HOME and ORACLE_SID should be set")

    rman = os.path.join(os.environ['ORACLE_HOME'], "bin", "rman")
    if not (os.path.isfile(rman) and os.access(rman, os.X_OK)):
        sys.exit("Could not find a usable rman in this environment")

    click.echo("Connecting to Oracle database")
    db = cx_Oracle.connect("/", mode=cx_Oracle.SYSDBA)

    cursor = db.cursor()

    cursor.execute('select version from v$instance')
    for row in cursor:
        dbversion = row[0]
    if '18.' in dbversion:
        cursor.execute('select version_full from v$instance')
        for row in cursor:
            dbversion = row[0]
    click.echo("Oracle version is:%s" % dbversion)

    omf = None
    cursor.execute("select value from v$parameter where name ='db_create_file_dest'")
    for row in cursor:
        omf = row[0]
    if omf is None:
        sys.exit("OMF is required for the script to work.")

    click.echo("Checking the archive log mode of the database")
    cursor.execute('select log_mode from v$database')
    for row in cursor:
        if row[0] != "ARCHIVELOG":
            sys.exit("Database should be in archivelog mode")

    # Make sure that the database instance is open. It is possible to backup the database even if it is
    # mounted. But, it will have to be consistently dismounted. The script doesn't handle that case now
    click.echo("Checking if the database is open")
    cursor.execute('select status from v$instance')
    for row in cursor:
        if row[0] != 'OPEN':
            sys.exit("Database is not open")

    # Make sure spfile is in play
    cursor.execute("select count(*) from v$parameter where name = 'spfile' and value is not null")
    for row in cursor:
        if row[0] != 1:
            sys.exit("script requires the instance to be started with spfile")

    dbId = None
    dbName = None
    dbUniqueName = None
    # Get the name of the database
    click.echo("Getting database name and database unique name")
    cursor.execute('select dbid, name, db_unique_name from v$database')
    for row in cursor:
        dbId = row[0]
        dbName = row[1]
        dbUniqueName = row[2]
        click.echo("Database Id:%d Name:%s UniqueName:%s" % (dbId, dbName, dbUniqueName))

    charSet = None
    click.echo("Fetching character set")
    cursor.execute("select value$ from sys.props$ where name='NLS_CHARACTERSET'")
    for row in cursor:
        charSet = row[0]
        click.echo("Character Set:%s" % charSet)

    nCharSet = None
    click.echo("Fetching national character set")
    cursor.execute("select value$ from sys.props$ where name='NLS_NCHAR_CHARACTERSET'")
    for row in cursor:
        nCharSet = row[0]
        click.echo("National Character Set:%s" % nCharSet)

    racMode = 'FALSE'
    click.echo("Fetching rac mode")
    cursor.execute("select value from v$parameter where name='cluster_database'")
    for row in cursor:
        racMode = row[0]
        click.echo("Rac mode:%s" % racMode)

    pdbname = None
    cdbmode = None
    tdeenabled = False
    walletLoc = None

    # check if tde is enabled
    cursor.execute('select BITAND(flags, 8) from x$kcbdbk')
    for row in cursor:
        if row[0] == 8:
            tdeenabled = True

    if not tdeenabled:
        if not rmanpassword:
            sys.exit("TDE is not active in the instance. RMAN password is required")
    else:
        # TDE Enabled
        if rmanpassword:
            sys.exit("TDE is active in the instance. RMAN password should not be specified")
        cursor.execute('select upper(wrl_type), wrl_parameter, upper(status) from v$encryption_wallet where rownum < 2')
        for row in cursor:
            if (row[0] != 'FILE') or (row[2] != 'OPEN'):
                sys.exit("This operation requires the wallet to be in file and for it to be open")
            if row[1] is not None:
                walletLoc = os.path.expandvars(row[1])
            if row[1] is None or not os.path.isdir(walletLoc):
                sys.exit("Could not query the wallet file.\nPlease ensure that any environment variables referenced in sqlnet.ora file are set")

        versions = dbversion.split(".")
        if int(versions[0]) >= 12:
            cursor.execute('select upper(wallet_type) from v$encryption_wallet')
            for row in cursor:
                if row[0] != 'AUTOLOGIN' and row[0] != 'UNKNOWN':
                    sys.exit("Unsupported wallet type:" + row[0])

            cursor.execute('select cdb from v$database')
            for row in cursor:
                cdbmode = row[0]

            if cdbmode == "YES":
                pdbs = []
                cursor.execute("select name from v$containers where name <> 'CDB$ROOT' and open_mode like 'READ%'")
                for row in cursor:
                    pdbs.append(row[0])
                for pdb in pdbs:
                    cursor.execute('alter session set container = "' + pdb + '"')
                    cursor.execute(
                        'select upper(wrl_type), upper(status), upper(wallet_type), wrl_parameter from v$encryption_wallet')
                    for row in cursor:
                        if row[0] != 'FILE' or row[1] != 'OPEN' or (row[2] != 'AUTOLOGIN' and row[2] != 'UNKNOWN'):
                            sys.exit("PDB:" + pdb + " cannot be backed up")

    # Get the datasize and redosize
    dataSize = 0
    redoSize = 0
    cursor.execute('select sum(bytes)/1024/1024 from ( select sum(bytes) bytes from v$datafile union select sum(bytes) bytes from v$tempfile)')
    for row in cursor:
        dataSize = math.ceil(row[0])
    cursor.execute("select sum(bytes)/1024/1024 from (select sum(bytes*members) bytes from v$log where group# in " +  # noqa: W504
                   "(select group# from v$logfile where type='ONLINE') union select (BLOCK_SIZE*FILE_SIZE_BLKS) bytes from v$controlfile)")
    for row in cursor:
        redoSize = math.ceil(row[0])
    if dataSize == 0 or redoSize == 0:
        sys.exit("Failed to determine data size and/or redo size")

    # dump all initialization parameters. The most likely reason why restore will fail is
    # due to references to local system in initialization parameters
    cursor.execute("select name, value from v$parameter where isdefault = 'FALSE'")
    with open(os.path.join(tmpdir, "parameter.log"), 'w') as pf:
        for row in cursor:
            pf.write(row[0] + ":\t\t" + row[1] if row[1] else "<None>")
            pf.write("\n")

    cursor.close()
    db.close()

    # run rman once to make sure that it is running
    p = Popen([rman, "target", "/"], stdin=PIPE, stdout=PIPE, stderr=PIPE)
    out, err = p.communicate(b'show all;')
    if err or not out or (p.returncode != 0):
        sys.exit("Error while test running rman")
    out = out.splitlines()
    if out[-1] != b'Recovery Manager complete.':
        sys.exit("Failed to successfully execute rman")

    client = cli_util.build_client('database', ctx)

    # Create Backup resource
    details = {}
    details['availabilityDomain'] = availability_domain
    if charSet:
        details['characterSet'] = charSet
    if nCharSet:
        details['ncharacterSet'] = nCharSet
    if 'edition' in kwargs and kwargs['edition']:
        details['databaseEdition'] = kwargs['edition']
    if dbName:
        details['dbName'] = dbName
    if 'display_name' in kwargs and kwargs['display_name']:
        details['displayName'] = kwargs['display_name']
    if dbUniqueName:
        details['dbUniqueName'] = dbUniqueName
    if dbId:
        details['externalDatabaseIdentifier'] = dbId
    details['databaseMode'] = 'SI' if racMode == 'FALSE' else 'RAC'
    details['dbVersion'] = dbversion
    details['pdbName'] = pdbname
    details['compartmentId'] = kwargs['compartment_id']

    click.echo("Creating external backup job resource...")
    backupId = None
    try:
        kwargs = {}
        result = client.create_external_backup_job(
            create_external_backup_job_details=details,
            **kwargs
        )
        backupId = result.data.backup_id
        click.echo("Created external backup job resource with id: " + backupId)

        # Check until the backup is ready
        while True:
            kwargs = {}
            result = client.get_external_backup_job(
                backup_id=backupId,
                **kwargs
            )

            if backupId != result.data.backup_id:
                sys.exit("Internal error, backupId mismatch. Please contact Oracle support")
            if result.data.provisioning:
                time.sleep(10)
                click.echo("Creating external backup job resource...")
                continue

            swiftPath = result.data.swift_path
            bucketName = result.data.bucket_name
            rmanTag = result.data.tag
            userName = result.data.user_name
            passWord = result.data.swift_password
            if swiftPath is None or \
                    bucketName is None or \
                    rmanTag is None or \
                    userName is None or \
                    passWord is None:
                sys.exit("Backup no longer exists")
            break

        # wait for object store credential to be available
        click.echo("Waiting for completion of external backup job...")
        time.sleep(30)

        if tdeenabled:
            # push the wallet to object store
            tdeWalletFile = os.path.join(tmpdir, 'tdeWallet.tar.gz')
            tdeWalletPath = swiftPath + '/' + bucketName + '/tdeWallet.tar.gz'
            click.echo("Compressing the wallet")
            with tarfile.open(tdeWalletFile, 'w:gz') as tar:
                tar.add(walletLoc, arcname=os.path.basename(walletLoc))
            click.echo("Uploading wallet")
            with open(tdeWalletFile, 'rb') as payload:
                response = requests.put(tdeWalletPath,
                                        auth=HTTPBasicAuth(userName, passWord),
                                        headers={'Content-Length': str(os.path.getsize(tdeWalletFile))},
                                        data=payload)
                response.raise_for_status()
        else:
            tdeWalletPath = None

        # push the parameter logs
        click.echo("Uploading parameter logs")
        with open(os.path.join(tmpdir, "parameter.log"), 'rb') as pf:
            response = requests.put(swiftPath + "/" + bucketName + "/" + "parameter.log",
                                    auth=HTTPBasicAuth(userName, passWord),
                                    headers={'Content-Length': str(os.path.getsize(os.path.join(tmpdir, "parameter.log")))},
                                    data=pf)
            response.raise_for_status()

        # Run opcInstaller
        cmd = "java -jar " + opcinstaller + " -host " + swiftPath + " -opcId '" + userName + "' -opcPass '" + passWord + \
              "' -walletDir " + tmpdir + " -libDir " + tmpdir + " -configFile " + \
              os.path.join(tmpdir, "opc" + os.environ['ORACLE_SID'] + ".ora") + " -container " + bucketName

        if additionalopcargs:
            cmd = cmd + " " + additionalopcargs

        click.echo("Setting up opc installer")

        cmd_redacted = "java -jar " + opcinstaller + " -host " + swiftPath + " -opcId '" + userName + "' -opcPass " + \
            "<redacted_password>" + " -walletDir " + tmpdir + " -libDir " + tmpdir + " -configFile " + \
            os.path.join(tmpdir, "opc" + os.environ['ORACLE_SID'] + ".ora") + " -container " + bucketName

        click.echo("Executing command: %s" % cmd_redacted)
        p = Popen(cmd, shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE)
        out, err = p.communicate()
        if err or (p.returncode != 0):
            sys.exit("Failed to run opcInstaller cmd:" + cmd)
            print(out)
            print(err)

        # Make sure that config file, wallet and the library exists
        libfile = "libopc.so" if os.name != 'nt' else "libopc.dll"
        if not os.path.exists(os.path.join(tmpdir, "opc" + os.environ['ORACLE_SID'] + ".ora")) or \
                not os.path.exists(os.path.join(tmpdir, "cwallet.sso")) or \
                not os.path.exists(os.path.join(tmpdir, libfile)):
            sys.exit("Unexpected status from opcInstaller. Please contact Oracle support")

        # Create RMAN script
        script = open(os.path.join(tmpdir, "rman.sql"), "w")
        script.write("set echo on\n")
        if not tdeenabled:
            script.write("set encryption on identified by '" + rmanpassword + "' only;\n")
        else:
            script.write("set encryption on;\n")

        script.write("run {\n")
        for channel in range(rmanchannels):
            script.write("allocate channel odbms" + str(channel) + " type sbt " +            # noqa: W504
                         "PARMS='SBT_LIBRARY=" + tmpdir + os.path.sep + libfile + "," +      # noqa: W504
                         "SBT_PARMS=(OPC_PFILE=" + tmpdir + os.path.sep + "opc" + os.environ['ORACLE_SID'] + ".ora)';\n")
        script.write("backup as compressed backupset database tag '" + rmanTag + "' " +      # noqa: W504
                     "format '" + rmanTag + "__%d_%I_%U_%T_%t' " +                           # noqa: W504
                     "keep until time 'sysdate+29000' restore point '" + rmanTag + "';\n" +  # noqa: W504
                     "}\n")
        script.close()

        # Execute RMAN
        click.echo("Executing RMAN. It will take a few minutes to complete..")
        p = Popen([rman, "target", "/", "log", os.path.join(tmpdir, "rman.log"), "@" + os.path.join(tmpdir, "rman.sql")], stdin=PIPE,
                  stdout=PIPE, stderr=PIPE)
        out, err = p.communicate()
        if err or (not out) or (p.returncode != 0):
            sys.exit("Error while running rman commands")
            print(out)
            print(err)

        with open(os.path.join(tmpdir, "rman.log"), 'rb') as rl:
            response = requests.put(swiftPath + "/" + bucketName + "/" + "rman.log",
                                    auth=HTTPBasicAuth(userName, passWord),
                                    headers={'Content-Length': str(os.path.getsize(os.path.join(tmpdir, "rman.log")))},
                                    data=rl)
            response.raise_for_status()

        # fetch the spfile and controlfile handles
        spfHandle = None
        cfHandle = None
        with open(os.path.join(tmpdir + os.sep + "rman.log")) as f:
            lines = f.readlines()
            lines = [x.strip() for x in lines]
            i = 0
            while i < len(lines):
                spf = (lines[i] == "including current SPFILE in backup set")
                if spf:
                    spfHandle = None if spf else 0
                    i = i + 3
                    if i < len(lines):
                        m = re.search('^piece handle=(.+?) tag=', lines[i])
                        if m:
                            spfHandle = m.group(1) if spf else 0
                cf = (lines[i] == "including current control file in backup set")
                if cf:
                    cfHandle = None if cf else 0
                    i = i + 3
                    if i < len(lines):
                        m = re.search('^piece handle=(.+?) tag=', lines[i])
                        if m:
                            cfHandle = m.group(1) if cf else 0
                else:
                    i = i + 1

        if spfHandle is None or cfHandle is None:
            sys.exit("Could not find spfile/controlfile Handle")

        click.echo("Completing the external backup job..")
        details = {}
        details['cfBackupHandle'] = cfHandle
        details['dataSize'] = dataSize
        details['redoSize'] = redoSize
        details['spfBackupHandle'] = spfHandle
        details['sqlPatches'] = []
        details['tdeWalletPath'] = tdeWalletPath
        result = client.complete_external_backup_job(
            backup_id=backupId,
            complete_external_backup_job_details=details,
            **kwargs
        )
        # print(details)
        click.echo("Response:%s" % result.status)
        click.echo("External Backup created.")
        backupId = None

    finally:
        if backupId is not None:
            click.echo("Deleting incomplete backup")
            kwargs = {}
            client.delete_backup(
                backup_id=backupId,
                **kwargs
            )
        shutil.rmtree(tmpdir)
Beispiel #38
0
    def init(self, force=False):
        idxs = [self.index_stem, self.doc_store]
        self._init_indices_parallel(idxs, self._init_iter_collection(), force)
        if not self.config['init_skip_doctttttquery']:
            self._init_indices_parallel([self.index_doctttttquery_stem], self._init_doctttttquery_iter(), force)

        base_path = util.path_dataset(self)

        needs_queries = []
        if force or not os.path.exists(os.path.join(base_path, 'train.queries.tsv')):
            needs_queries.append(lambda it: plaintext.write_tsv(os.path.join(base_path, 'train.queries.tsv'), ((qid, txt) for file, qid, txt in it if file == 'queries.train.tsv' and qid not in MINI_DEV)))
        if force or not os.path.exists(os.path.join(base_path, 'minidev.queries.tsv')):
            needs_queries.append(lambda it: plaintext.write_tsv(os.path.join(base_path, 'minidev.queries.tsv'), ((qid, txt) for file, qid, txt in it if file == 'queries.train.tsv' and qid in MINI_DEV)))
        if force or not os.path.exists(os.path.join(base_path, 'dev.queries.tsv')):
            needs_queries.append(lambda it: plaintext.write_tsv(os.path.join(base_path, 'dev.queries.tsv'), ((qid, txt) for file, qid, txt in it if file == 'queries.dev.tsv')))
        if force or not os.path.exists(os.path.join(base_path, 'eval.queries.tsv')):
            needs_queries.append(lambda it: plaintext.write_tsv(os.path.join(base_path, 'eval.queries.tsv'), ((qid, txt) for file, qid, txt in it if file == 'queries.eval.tsv')))

        if needs_queries and self._confirm_dua():
            with util.download_tmp(_SOURCES['queries'], expected_md5=_HASHES['queries']) as f, \
                 tarfile.open(fileobj=f) as tarf, \
                 contextlib.ExitStack() as ctxt:
                def _extr_subf(subf):
                    for qid, txt in plaintext.read_tsv(io.TextIOWrapper(tarf.extractfile(subf))):
                        yield subf, qid, txt
                query_iter = [_extr_subf('queries.train.tsv'), _extr_subf('queries.dev.tsv'), _extr_subf('queries.eval.tsv')]
                query_iter = tqdm(itertools.chain(*query_iter), desc='queries')
                query_iters = util.blocking_tee(query_iter, len(needs_queries))
                for fn, it in zip(needs_queries, query_iters):
                    ctxt.enter_context(util.CtxtThread(functools.partial(fn, it)))

        file = os.path.join(base_path, 'train.qrels')
        if (force or not os.path.exists(file)) and self._confirm_dua():
            stream = util.download_stream(_SOURCES['train-qrels'], 'utf8', expected_md5=_HASHES['train-qrels'])
            with util.finialized_file(file, 'wt') as out:
                for qid, _, did, score in plaintext.read_tsv(stream):
                    if qid not in MINI_DEV:
                        trec.write_qrels(out, [(qid, did, score)])

        file = os.path.join(base_path, 'minidev.qrels')
        if (force or not os.path.exists(file)) and self._confirm_dua():
            with util.finialized_file(file, 'wt') as out:
                for qid, did, score in trec.read_qrels(os.path.join(base_path, 'train.qrels')):
                    if qid in MINI_DEV:
                        trec.write_qrels(out, [(qid, did, score)])

        file = os.path.join(base_path, 'dev.qrels')
        if (force or not os.path.exists(file)) and self._confirm_dua():
            stream = util.download_stream(_SOURCES['dev-qrels'], 'utf8', expected_md5=_HASHES['dev-qrels'])
            with util.finialized_file(file, 'wt') as out:
                for qid, _, did, score in plaintext.read_tsv(stream):
                    trec.write_qrels(out, [(qid, did, score)])

        file = os.path.join(base_path, 'train.mspairs.gz')
        if not os.path.exists(file) and os.path.exists(os.path.join(base_path, 'qidpidtriples.train.full')):
            # legacy
            os.rename(os.path.join(base_path, 'qidpidtriples.train.full'), file)
        if (force or not os.path.exists(file)) and self._confirm_dua():
            util.download(_SOURCES['qidpidtriples.train.full'], file, expected_md5=_HASHES['qidpidtriples.train.full'])

        if not self.config['init_skip_msrun']:
            for file_name, subf in [('dev.msrun', 'top1000.dev'), ('eval.msrun', 'top1000.eval'), ('train.msrun', 'top1000.train.txt')]:
                file = os.path.join(base_path, file_name)
                if (force or not os.path.exists(file)) and self._confirm_dua():
                    run = {}
                    with util.download_tmp(_SOURCES[file_name], expected_md5=_HASHES[file_name]) as f, \
                         tarfile.open(fileobj=f) as tarf:
                        for qid, did, _, _ in tqdm(plaintext.read_tsv(io.TextIOWrapper(tarf.extractfile(subf)))):
                            if qid not in run:
                                run[qid] = {}
                            run[qid][did] = 0.
                    if file_name == 'train.msrun':
                        minidev = {qid: dids for qid, dids in run.items() if qid in MINI_DEV}
                        with self.logger.duration('writing minidev.msrun'):
                            trec.write_run_dict(os.path.join(base_path, 'minidev.msrun'), minidev)
                        run = {qid: dids for qid, dids in run.items() if qid not in MINI_DEV}
                    with self.logger.duration(f'writing {file_name}'):
                        trec.write_run_dict(file, run)

        query_path = os.path.join(base_path, 'trec2019.queries.tsv')
        if (force or not os.path.exists(query_path)) and self._confirm_dua():
            stream = util.download_stream(_SOURCES['trec2019.queries'], 'utf8', expected_md5=_HASHES['trec2019.queries'])
            plaintext.write_tsv(query_path, plaintext.read_tsv(stream))
        msrun_path = os.path.join(base_path, 'trec2019.msrun')
        if (force or not os.path.exists(msrun_path)) and self._confirm_dua():
            run = {}
            with util.download_stream(_SOURCES['trec2019.msrun'], 'utf8', expected_md5=_HASHES['trec2019.msrun']) as stream:
                for qid, did, _, _ in plaintext.read_tsv(stream):
                    if qid not in run:
                        run[qid] = {}
                    run[qid][did] = 0.
            with util.finialized_file(msrun_path, 'wt') as f:
                trec.write_run_dict(f, run)

        qrels_path = os.path.join(base_path, 'trec2019.qrels')
        if not os.path.exists(qrels_path) and self._confirm_dua():
            util.download(_SOURCES['trec2019.qrels'], qrels_path, expected_md5=_HASHES['trec2019.qrels'])
        qrels_path = os.path.join(base_path, 'judgedtrec2019.qrels')
        if not os.path.exists(qrels_path):
            os.symlink('trec2019.qrels', qrels_path)
        query_path = os.path.join(base_path, 'judgedtrec2019.queries.tsv')
        judged_qids = util.Lazy(lambda: trec.read_qrels_dict(qrels_path).keys())
        if (force or not os.path.exists(query_path)):
            with util.finialized_file(query_path, 'wt') as f:
                for qid, qtext in plaintext.read_tsv(os.path.join(base_path, 'trec2019.queries.tsv')):
                    if qid in judged_qids():
                        plaintext.write_tsv(f, [(qid, qtext)])
        msrun_path = os.path.join(base_path, 'judgedtrec2019.msrun')
        if (force or not os.path.exists(msrun_path)) and self._confirm_dua():
            with util.finialized_file(msrun_path, 'wt') as f:
                for qid, dids in trec.read_run_dict(os.path.join(base_path, 'trec2019.msrun')).items():
                    if qid in judged_qids():
                        trec.write_run_dict(f, {qid: dids})

        # A subset of dev that only contains queries that have relevance judgments
        judgeddev_path = os.path.join(base_path, 'judgeddev')
        judged_qids = util.Lazy(lambda: trec.read_qrels_dict(os.path.join(base_path, 'dev.qrels')).keys())
        if not os.path.exists(f'{judgeddev_path}.qrels'):
            os.symlink('dev.qrels', f'{judgeddev_path}.qrels')
        if not os.path.exists(f'{judgeddev_path}.queries.tsv'):
            with util.finialized_file(f'{judgeddev_path}.queries.tsv', 'wt') as f:
                for qid, qtext in plaintext.read_tsv(os.path.join(base_path, 'dev.queries.tsv')):
                    if qid in judged_qids():
                        plaintext.write_tsv(f, [(qid, qtext)])
        if not self.config['init_skip_msrun']:
            if not os.path.exists(f'{judgeddev_path}.msrun'):
                with util.finialized_file(f'{judgeddev_path}.msrun', 'wt') as f:
                    for qid, dids in trec.read_run_dict(os.path.join(base_path, 'dev.msrun')).items():
                        if qid in judged_qids():
                            trec.write_run_dict(f, {qid: dids})

        if not self.config['init_skip_train10']:
            file = os.path.join(base_path, 'train10.queries.tsv')
            if not os.path.exists(file):
                with util.finialized_file(file, 'wt') as fout:
                    for qid, qtext in self.logger.pbar(plaintext.read_tsv(os.path.join(base_path, 'train.queries.tsv')), desc='filtering queries for train10'):
                        if int(qid) % 10 == 0:
                            plaintext.write_tsv(fout, [(qid, qtext)])

            file = os.path.join(base_path, 'train10.qrels')
            if not os.path.exists(file):
                with util.finialized_file(file, 'wt') as fout, open(os.path.join(base_path, 'train.qrels'), 'rt') as fin:
                    for line in self.logger.pbar(fin, desc='filtering qrels for train10'):
                        qid = line.split()[0]
                        if int(qid) % 10 == 0:
                            fout.write(line)

            if not self.config['init_skip_msrun']:
                file = os.path.join(base_path, 'train10.msrun')
                if not os.path.exists(file):
                    with util.finialized_file(file, 'wt') as fout, open(os.path.join(base_path, 'train.msrun'), 'rt') as fin:
                        for line in self.logger.pbar(fin, desc='filtering msrun for train10'):
                            qid = line.split()[0]
                            if int(qid) % 10 == 0:
                                fout.write(line)

            file = os.path.join(base_path, 'train10.mspairs.gz')
            if not os.path.exists(file):
                with gzip.open(file, 'wt') as fout, gzip.open(os.path.join(base_path, 'train.mspairs.gz'), 'rt') as fin:
                    for qid, did1, did2 in self.logger.pbar(plaintext.read_tsv(fin), desc='filtering mspairs for train10'):
                        if int(qid) % 10 == 0:
                            plaintext.write_tsv(fout, [(qid, did1, did2)])

        if not self.config['init_skip_train_med']:
            med_qids = util.Lazy(lambda: {qid.strip() for qid in util.download_stream('https://raw.githubusercontent.com/Georgetown-IR-Lab/covid-neural-ir/master/med-msmarco-train.txt', 'utf8', expected_md5="dc5199de7d4a872c361f89f08b1163ef")})
            file = os.path.join(base_path, 'train_med.queries.tsv')
            if not os.path.exists(file):
                with util.finialized_file(file, 'wt') as fout:
                    for qid, qtext in self.logger.pbar(plaintext.read_tsv(os.path.join(base_path, 'train.queries.tsv')), desc='filtering queries for train_med'):
                        if qid in med_qids():
                            plaintext.write_tsv(fout, [(qid, qtext)])

            file = os.path.join(base_path, 'train_med.qrels')
            if not os.path.exists(file):
                with util.finialized_file(file, 'wt') as fout, open(os.path.join(base_path, 'train.qrels'), 'rt') as fin:
                    for line in self.logger.pbar(fin, desc='filtering qrels for train_med'):
                        qid = line.split()[0]
                        if qid in med_qids():
                            fout.write(line)

            if not self.config['init_skip_msrun']:
                file = os.path.join(base_path, 'train_med.msrun')
                if not os.path.exists(file):
                    with util.finialized_file(file, 'wt') as fout, open(os.path.join(base_path, 'train.msrun'), 'rt') as fin:
                        for line in self.logger.pbar(fin, desc='filtering msrun for train_med'):
                            qid = line.split()[0]
                            if qid in med_qids():
                                fout.write(line)

            file = os.path.join(base_path, 'train_med.mspairs.gz')
            if not os.path.exists(file):
                with gzip.open(file, 'wt') as fout, gzip.open(os.path.join(base_path, 'train.mspairs.gz'), 'rt') as fin:
                    for qid, did1, did2 in self.logger.pbar(plaintext.read_tsv(fin), desc='filtering mspairs for train_med'):
                        if qid in med_qids():
                            plaintext.write_tsv(fout, [(qid, did1, did2)])
Beispiel #39
0
def tarpackage(files, artifactId):
    tar = tarfile.open("{}.tar.gz".format(artifactId), "w:gz")
    for filename in files:
        tar.add(filename)
    tar.close()
Beispiel #40
0
 def _init_iter_collection(self):
     with util.download_tmp(_SOURCES['collection'], expected_md5=_HASHES['collection']) as f:
         with tarfile.open(fileobj=f) as tarf:
             collection_stream = io.TextIOWrapper(tarf.extractfile('collection.tsv'))
             for did, text in self.logger.pbar(plaintext.read_tsv(collection_stream), desc='documents'):
                 yield indices.RawDoc(did, text)
Beispiel #41
0
def gen_min(
    cachedir,
    extra_mods="",
    overwrite=False,
    so_mods="",
    python2_bin="python2",
    python3_bin="python3",
):
    """
    Generate the salt-min tarball and print the location of the tarball
    Optional additional mods to include (e.g. mako) can be supplied as a comma
    delimited string.  Permits forcing an overwrite of the output file as well.

    CLI Example:

    .. code-block:: bash

        salt-run min.generate
        salt-run min.generate mako
        salt-run min.generate mako,wempy 1
        salt-run min.generate overwrite=1
    """
    mindir = os.path.join(cachedir, "min")
    if not os.path.isdir(mindir):
        os.makedirs(mindir)
    mintar = os.path.join(mindir, "min.tgz")
    minver = os.path.join(mindir, "version")
    pyminver = os.path.join(mindir, ".min-gen-py-version")
    salt_call = os.path.join(mindir, "salt-call")
    with salt.utils.files.fopen(salt_call, "wb") as fp_:
        fp_.write(_get_salt_call())
    if os.path.isfile(mintar):
        if not overwrite:
            if os.path.isfile(minver):
                with salt.utils.files.fopen(minver) as fh_:
                    overwrite = fh_.read() != salt.version.__version__
                if overwrite is False and os.path.isfile(pyminver):
                    with salt.utils.files.fopen(pyminver) as fh_:
                        overwrite = fh_.read() != str(
                            sys.version_info[0]
                        )  # future lint: disable=blacklisted-function
            else:
                overwrite = True

        if overwrite:
            try:
                os.remove(mintar)
            except OSError:
                pass
        else:
            return mintar
    if _six.PY3:
        # Let's check for the minimum python 2 version requirement, 2.6
        py_shell_cmd = (
            python2_bin + " -c 'from __future__ import print_function; import sys; "
            'print("{0}.{1}".format(*(sys.version_info[:2])));\''
        )
        cmd = subprocess.Popen(py_shell_cmd, stdout=subprocess.PIPE, shell=True)
        stdout, _ = cmd.communicate()
        if cmd.returncode == 0:
            py2_version = tuple(
                int(n) for n in stdout.decode("utf-8").strip().split(".")
            )
            if py2_version < (2, 6):
                # Bail!
                raise salt.exceptions.SaltSystemExit(
                    'The minimum required python version to run salt-ssh is "2.6".'
                    'The version reported by "{0}" is "{1}". Please try "salt-ssh '
                    '--python2-bin=<path-to-python-2.6-binary-or-higher>".'.format(
                        python2_bin, stdout.strip()
                    )
                )
    elif sys.version_info < (2, 6):
        # Bail! Though, how did we reached this far in the first place.
        raise salt.exceptions.SaltSystemExit(
            'The minimum required python version to run salt-ssh is "2.6".'
        )

    tops_py_version_mapping = {}
    tops = get_tops(extra_mods=extra_mods, so_mods=so_mods)
    if _six.PY2:
        tops_py_version_mapping["2"] = tops
    else:
        tops_py_version_mapping["3"] = tops

    # TODO: Consider putting known py2 and py3 compatible libs in its own sharable directory.
    #       This would reduce the min size.
    if _six.PY2 and sys.version_info[0] == 2:
        # Get python 3 tops
        py_shell_cmd = (
            python3_bin + " -c 'import sys; import json; import salt.utils.thin; "
            "print(json.dumps(salt.utils.thin.get_tops(**(json.loads(sys.argv[1]))), ensure_ascii=False)); exit(0);' "
            "'{0}'".format(
                salt.utils.json.dumps({"extra_mods": extra_mods, "so_mods": so_mods})
            )
        )
        cmd = subprocess.Popen(
            py_shell_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True
        )
        stdout, stderr = cmd.communicate()
        if cmd.returncode == 0:
            try:
                tops = salt.utils.json.loads(stdout)
                tops_py_version_mapping["3"] = tops
            except ValueError:
                pass
    if _six.PY3 and sys.version_info[0] == 3:
        # Get python 2 tops
        py_shell_cmd = (
            python2_bin + " -c 'from __future__ import print_function; "
            "import sys; import json; import salt.utils.thin; "
            "print(json.dumps(salt.utils.thin.get_tops(**(json.loads(sys.argv[1]))), ensure_ascii=False)); exit(0);' "
            "'{0}'".format(
                salt.utils.json.dumps({"extra_mods": extra_mods, "so_mods": so_mods})
            )
        )
        cmd = subprocess.Popen(
            py_shell_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True
        )
        stdout, stderr = cmd.communicate()
        if cmd.returncode == 0:
            try:
                tops = salt.utils.json.loads(stdout.decode("utf-8"))
                tops_py_version_mapping["2"] = tops
            except ValueError:
                pass

    tfp = tarfile.open(mintar, "w:gz", dereference=True)
    try:  # cwd may not exist if it was removed but salt was run from it
        start_dir = os.getcwd()
    except OSError:
        start_dir = None
    tempdir = None

    # This is the absolute minimum set of files required to run salt-call
    min_files = (
        "salt/__init__.py",
        "salt/utils",
        "salt/utils/__init__.py",
        "salt/utils/atomicfile.py",
        "salt/utils/validate",
        "salt/utils/validate/__init__.py",
        "salt/utils/validate/path.py",
        "salt/utils/decorators",
        "salt/utils/decorators/__init__.py",
        "salt/utils/cache.py",
        "salt/utils/xdg.py",
        "salt/utils/odict.py",
        "salt/utils/minions.py",
        "salt/utils/dicttrim.py",
        "salt/utils/sdb.py",
        "salt/utils/migrations.py",
        "salt/utils/files.py",
        "salt/utils/parsers.py",
        "salt/utils/locales.py",
        "salt/utils/lazy.py",
        "salt/utils/s3.py",
        "salt/utils/dictupdate.py",
        "salt/utils/verify.py",
        "salt/utils/args.py",
        "salt/utils/kinds.py",
        "salt/utils/xmlutil.py",
        "salt/utils/debug.py",
        "salt/utils/jid.py",
        "salt/utils/openstack",
        "salt/utils/openstack/__init__.py",
        "salt/utils/openstack/swift.py",
        "salt/utils/asynchronous.py",
        "salt/utils/process.py",
        "salt/utils/jinja.py",
        "salt/utils/rsax931.py",
        "salt/utils/context.py",
        "salt/utils/minion.py",
        "salt/utils/error.py",
        "salt/utils/aws.py",
        "salt/utils/timed_subprocess.py",
        "salt/utils/zeromq.py",
        "salt/utils/schedule.py",
        "salt/utils/url.py",
        "salt/utils/yamlencoding.py",
        "salt/utils/network.py",
        "salt/utils/http.py",
        "salt/utils/gzip_util.py",
        "salt/utils/vt.py",
        "salt/utils/templates.py",
        "salt/utils/aggregation.py",
        "salt/utils/yaml.py",
        "salt/utils/yamldumper.py",
        "salt/utils/yamlloader.py",
        "salt/utils/event.py",
        "salt/utils/state.py",
        "salt/serializers",
        "salt/serializers/__init__.py",
        "salt/serializers/yamlex.py",
        "salt/template.py",
        "salt/_compat.py",
        "salt/loader.py",
        "salt/client",
        "salt/client/__init__.py",
        "salt/ext",
        "salt/ext/__init__.py",
        "salt/ext/six.py",
        "salt/ext/ipaddress.py",
        "salt/version.py",
        "salt/syspaths.py",
        "salt/defaults",
        "salt/defaults/__init__.py",
        "salt/defaults/exitcodes.py",
        "salt/renderers",
        "salt/renderers/__init__.py",
        "salt/renderers/jinja.py",
        "salt/renderers/yaml.py",
        "salt/modules",
        "salt/modules/__init__.py",
        "salt/modules/test.py",
        "salt/modules/selinux.py",
        "salt/modules/cmdmod.py",
        "salt/modules/saltutil.py",
        "salt/minion.py",
        "salt/pillar",
        "salt/pillar/__init__.py",
        "salt/utils/textformat.py",
        "salt/log",
        "salt/log/__init__.py",
        "salt/log/handlers",
        "salt/log/handlers/__init__.py",
        "salt/log/mixins.py",
        "salt/log/setup.py",
        "salt/cli",
        "salt/cli/__init__.py",
        "salt/cli/caller.py",
        "salt/cli/daemons.py",
        "salt/cli/salt.py",
        "salt/cli/call.py",
        "salt/fileserver",
        "salt/fileserver/__init__.py",
        "salt/transport",
        "salt/transport/__init__.py",
        "salt/transport/client.py",
        "salt/exceptions.py",
        "salt/grains",
        "salt/grains/__init__.py",
        "salt/grains/extra.py",
        "salt/scripts.py",
        "salt/state.py",
        "salt/fileclient.py",
        "salt/crypt.py",
        "salt/config.py",
        "salt/beacons",
        "salt/beacons/__init__.py",
        "salt/payload.py",
        "salt/output",
        "salt/output/__init__.py",
        "salt/output/nested.py",
    )

    for py_ver, tops in _six.iteritems(tops_py_version_mapping):
        for top in tops:
            base = os.path.basename(top)
            top_dirname = os.path.dirname(top)
            if os.path.isdir(top_dirname):
                os.chdir(top_dirname)
            else:
                # This is likely a compressed python .egg
                tempdir = tempfile.mkdtemp()
                egg = zipfile.ZipFile(top_dirname)
                egg.extractall(tempdir)
                top = os.path.join(tempdir, base)
                os.chdir(tempdir)
            if not os.path.isdir(top):
                # top is a single file module
                tfp.add(base, arcname=os.path.join("py{0}".format(py_ver), base))
                continue
            for root, dirs, files in salt.utils.path.os_walk(base, followlinks=True):
                for name in files:
                    if name.endswith((".pyc", ".pyo")):
                        continue
                    if (
                        root.startswith("salt")
                        and os.path.join(root, name) not in min_files
                    ):
                        continue
                    tfp.add(
                        os.path.join(root, name),
                        arcname=os.path.join("py{0}".format(py_ver), root, name),
                    )
            if tempdir is not None:
                shutil.rmtree(tempdir)
                tempdir = None

    os.chdir(mindir)
    tfp.add("salt-call")
    with salt.utils.files.fopen(minver, "w+") as fp_:
        fp_.write(salt.version.__version__)
    with salt.utils.files.fopen(pyminver, "w+") as fp_:
        fp_.write(str(sys.version_info[0]))  # future lint: disable=blacklisted-function
    os.chdir(os.path.dirname(minver))
    tfp.add("version")
    tfp.add(".min-gen-py-version")
    if start_dir:
        os.chdir(start_dir)
    tfp.close()
    return mintar
Beispiel #42
0
    'data/8.4-tarfile_bad-example.tar',
    'data/8.4-tarfile_does-not-exist',
]
for filename in filenames:
    try:
        print '%35s %s' % (filename, tarfile.is_tarfile(filename))
    except IOError, err:
        print '%35s %s' % (filename, err)
print

## 8.4.2 Reading Metadata from an Archive
# Use the TarFile class to work directly on a tar archive.
# It supports reading data about files, as well as modifying archives
# use getnames() to read the names of the files in the archive

with closing(tarfile.open(filenames[1], 'r')) as tf:
    for index, name in enumerate(tf.getnames()):
        print 'File%3s:' % index, name

    # In addition to names, metadata is available as instances of TarInfo objects
    fmt = "\t{:8}:\t{}"
    for member_info in tf.getmembers():
        print member_info.name
        print fmt.format("Modified", time.ctime(member_info.mtime))
        print fmt.format("Mode", oct(member_info.mode))
        print fmt.format("Type", member_info.type)
        print fmt.format("Size", member_info.size), "bytes"
        print

    # Or, if the name is known in advance, it can be asked for directly
    print "Single lookup:"
import skimage.io
import skimage.transform


print("fme")
from tensorflow.examples.tutorials.mnist import input_data
print("laoding data")
mnist = input_data.read_data_sets('MNIST_data')

print("fme2")
BST_PATH = 'BSR_bsds500.tar'#gz'
directory = 'BSR/BSDS500/data/images/train/'

rand = np.random.RandomState(42)

f = tarfile.open(BST_PATH)
train_files = []
for name in f.getnames():
    if name.startswith('BSR/BSDS500/data/images/train/'):
        train_files.append(name)

print('Loading BSR training images')
#for name in os.listdir(directory):
#    train_files.append(name)

#print('Loading BSR training images')
background_data = []
for name in train_files:
    try:
        fp = f.extractfile(name)
        #fp = open(directory + name, 'r')
def extractFile(file_name):
    if (file_name.endswith("tar.gz")):
        print('Extract...')
        tar = tarfile.open(file_name, "r:gz")
        tar.extractall()
        tar.close()
Beispiel #45
0
def iscore_svm(train=False,train_class='caseID.lst',trainID=None,testID=None,
				kernel='./kernel/',save_model='svm_model.pkl',load_model=None,
				package_model=False,package_name=None,graph='./graph/',
				include_kernel=False, maxlen = None,score_file='iScorePredict'):
	"""Function called in the binary iScore.predict and iScore.train

	Args:
	    train (bool, optional): train or predict
	    train_class (str, optional): file name containing the ID and classes of the train set
	    trainID (None, optional): file containing the ID of the train set
	    testID (None, optional): file containing the ID of the test set
	    kernel (str, optional): directory containing the kernel files
	    save_model (str, optional): save the model in a pickle file after training
	    load_model (None, optional): load a model for testing
	    package_model (bool, optional): Create an archive file containing the training set
	    package_name (None, optional): Name of the archive file
	    graph (str, optional): directory containing the graphs
	    include_kernel (bool, optional): Include the kernels in the archive file
	    maxlen (None, optional): maximum walk length
	    score_file (str, optional): output file containg the prediction

	Raises:
	    ValueError: If the kernel files are nout found
	"""

	# figure out the kernel files
	# if a dir was given all the file in that dir are considered
	if os.path.isdir(kernel):
		Kfile =  [kernel + f for f in os.listdir(kernel)]
	elif os.path.isfile(kernel):
		Kfile = kernel
	else:
		raise ValueError('Kernel file not found')

	# train the model
	if train:

		traindata = DataSet(train_class,Kfile,maxlen)
		svm = SVM(trainDataSet=traindata)
		svm.train(model_file_name=save_model)

		if package_model:
			print('Create Archive file : ', package_name)
			svm.archive(graph_path=graph,
				        kernel_path=kernel,
				        include_kernel=include_kernel,
				        model_name=package_name)

	# use a trained model for prediction
	else:

		if trainID is None:
			tar = tarfile.open(package_name)
			members = tar.getmembers()
			trainID = [os.path.splitext(os.path.basename(m.name))[0] for m in members if m.name.startswith('./graph/')]

		if testID is None:
			testID = [os.path.splitext(n)[0] for n in os.listdir('./graph/')]

		testdata = DataSet(trainID,Kfile,maxlen,testID=testID)
		svm = SVM(testDataSet = testdata)
		svm.predict(package_name = package_name)
		svm.export_prediction(score_file)
Beispiel #46
0
def gen_thin(
    cachedir,
    extra_mods="",
    overwrite=False,
    so_mods="",
    python2_bin="python2",
    python3_bin="python3",
    absonly=True,
    compress="gzip",
    extended_cfg=None,
):
    """
    Generate the salt-thin tarball and print the location of the tarball
    Optional additional mods to include (e.g. mako) can be supplied as a comma
    delimited string.  Permits forcing an overwrite of the output file as well.

    CLI Example:

    .. code-block:: bash

        salt-run thin.generate
        salt-run thin.generate mako
        salt-run thin.generate mako,wempy 1
        salt-run thin.generate overwrite=1
    """
    if sys.version_info < (2, 6):
        raise salt.exceptions.SaltSystemExit(
            'The minimum required python version to run salt-ssh is "2.6".'
        )
    if compress not in ["gzip", "zip"]:
        log.warning(
            'Unknown compression type: "%s". Falling back to "gzip" compression.',
            compress,
        )
        compress = "gzip"

    thindir = os.path.join(cachedir, "thin")
    if not os.path.isdir(thindir):
        os.makedirs(thindir)
    thintar = os.path.join(thindir, "thin." + (compress == "gzip" and "tgz" or "zip"))
    thinver = os.path.join(thindir, "version")
    pythinver = os.path.join(thindir, ".thin-gen-py-version")
    salt_call = os.path.join(thindir, "salt-call")
    pymap_cfg = os.path.join(thindir, "supported-versions")
    code_checksum = os.path.join(thindir, "code-checksum")
    digest_collector = salt.utils.hashutils.DigestCollector()

    with salt.utils.files.fopen(salt_call, "wb") as fp_:
        fp_.write(_get_salt_call("pyall", **_get_ext_namespaces(extended_cfg)))

    if os.path.isfile(thintar):
        if not overwrite:
            if os.path.isfile(thinver):
                with salt.utils.files.fopen(thinver) as fh_:
                    overwrite = fh_.read() != salt.version.__version__
                if overwrite is False and os.path.isfile(pythinver):
                    with salt.utils.files.fopen(pythinver) as fh_:
                        overwrite = fh_.read() != str(
                            sys.version_info[0]
                        )  # future lint: disable=blacklisted-function
            else:
                overwrite = True

        if overwrite:
            try:
                log.debug("Removing %s archive file", thintar)
                os.remove(thintar)
            except OSError as exc:
                log.error("Error while removing %s file: %s", thintar, exc)
                if os.path.exists(thintar):
                    raise salt.exceptions.SaltSystemExit(
                        "Unable to remove {} file. See logs for details.".format(
                            thintar
                        )
                    )
        else:
            return thintar
    if _six.PY3:
        # Let's check for the minimum python 2 version requirement, 2.6
        if not salt.utils.path.which(python2_bin):
            log.debug(
                "%s binary does not exist. Will not detect Python 2 version",
                python2_bin,
            )
        else:
            py_shell_cmd = "{} -c 'import sys;sys.stdout.write(\"%s.%s\\n\" % sys.version_info[:2]);'".format(
                python2_bin
            )
            cmd = subprocess.Popen(py_shell_cmd, stdout=subprocess.PIPE, shell=True)
            stdout, _ = cmd.communicate()
            if cmd.returncode == 0:
                py2_version = tuple(
                    int(n) for n in stdout.decode("utf-8").strip().split(".")
                )
                if py2_version < (2, 6):
                    raise salt.exceptions.SaltSystemExit(
                        'The minimum required python version to run salt-ssh is "2.6".'
                        'The version reported by "{0}" is "{1}". Please try "salt-ssh '
                        '--python2-bin=<path-to-python-2.6-binary-or-higher>".'.format(
                            python2_bin, stdout.strip()
                        )
                    )
            else:
                log.debug("Unable to detect %s version", python2_bin)
                log.debug(stdout)

    tops_failure_msg = "Failed %s tops for Python binary %s."
    python_check_msg = (
        "%s binary does not exist. Will not attempt to generate tops for Python %s"
    )
    tops_py_version_mapping = {}
    tops = get_tops(extra_mods=extra_mods, so_mods=so_mods)
    tops_py_version_mapping[sys.version_info.major] = tops

    # Collect tops, alternative to 2.x version
    if _six.PY2 and sys.version_info.major == 2:
        # Get python 3 tops
        if not salt.utils.path.which(python3_bin):
            log.debug(python_check_msg, python3_bin, "3")
        else:
            py_shell_cmd = "{0} -c 'import salt.utils.thin as t;print(t.gte())' '{1}'".format(
                python3_bin,
                salt.utils.json.dumps({"extra_mods": extra_mods, "so_mods": so_mods}),
            )
            cmd = subprocess.Popen(
                py_shell_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True
            )
            stdout, stderr = cmd.communicate()
            if cmd.returncode == 0:
                try:
                    tops = salt.utils.json.loads(stdout)
                    tops_py_version_mapping["3"] = tops
                except ValueError as err:
                    log.error(tops_failure_msg, "parsing", python3_bin)
                    log.exception(err)
            else:
                log.debug(tops_failure_msg, "collecting", python3_bin)
                log.debug(stderr)

    # Collect tops, alternative to 3.x version
    if _six.PY3 and sys.version_info.major == 3:
        # Get python 2 tops
        if not salt.utils.path.which(python2_bin):
            log.debug(python_check_msg, python2_bin, "2")
        else:
            py_shell_cmd = "{0} -c 'import salt.utils.thin as t;print(t.gte())' '{1}'".format(
                python2_bin,
                salt.utils.json.dumps({"extra_mods": extra_mods, "so_mods": so_mods}),
            )
            cmd = subprocess.Popen(
                py_shell_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True
            )
            stdout, stderr = cmd.communicate()
            if cmd.returncode == 0:
                try:
                    tops = salt.utils.json.loads(stdout.decode("utf-8"))
                    tops_py_version_mapping["2"] = tops
                except ValueError as err:
                    log.error(tops_failure_msg, "parsing", python2_bin)
                    log.exception(err)
            else:
                log.debug(tops_failure_msg, "collecting", python2_bin)
                log.debug(stderr)

    with salt.utils.files.fopen(pymap_cfg, "wb") as fp_:
        fp_.write(
            _get_supported_py_config(
                tops=tops_py_version_mapping, extended_cfg=extended_cfg
            )
        )

    tmp_thintar = _get_thintar_prefix(thintar)
    if compress == "gzip":
        tfp = tarfile.open(tmp_thintar, "w:gz", dereference=True)
    elif compress == "zip":
        tfp = zipfile.ZipFile(
            tmp_thintar,
            "w",
            compression=zlib and zipfile.ZIP_DEFLATED or zipfile.ZIP_STORED,
        )
        tfp.add = tfp.write
    try:  # cwd may not exist if it was removed but salt was run from it
        start_dir = os.getcwd()
    except OSError:
        start_dir = None
    tempdir = None

    # Pack default data
    log.debug("Packing default libraries based on current Salt version")
    for py_ver, tops in _six.iteritems(tops_py_version_mapping):
        for top in tops:
            if absonly and not os.path.isabs(top):
                continue
            base = os.path.basename(top)
            top_dirname = os.path.dirname(top)
            if os.path.isdir(top_dirname):
                os.chdir(top_dirname)
            else:
                # This is likely a compressed python .egg
                tempdir = tempfile.mkdtemp()
                egg = zipfile.ZipFile(top_dirname)
                egg.extractall(tempdir)
                top = os.path.join(tempdir, base)
                os.chdir(tempdir)

            site_pkg_dir = _is_shareable(base) and "pyall" or "py{}".format(py_ver)

            log.debug('Packing "%s" to "%s" destination', base, site_pkg_dir)
            if not os.path.isdir(top):
                # top is a single file module
                if os.path.exists(os.path.join(top_dirname, base)):
                    tfp.add(base, arcname=os.path.join(site_pkg_dir, base))
                continue
            for root, dirs, files in salt.utils.path.os_walk(base, followlinks=True):
                for name in files:
                    if not name.endswith((".pyc", ".pyo")):
                        digest_collector.add(os.path.join(root, name))
                        arcname = os.path.join(site_pkg_dir, root, name)
                        if hasattr(tfp, "getinfo"):
                            try:
                                # This is a little slow but there's no clear way to detect duplicates
                                tfp.getinfo(os.path.join(site_pkg_dir, root, name))
                                arcname = None
                            except KeyError:
                                log.debug(
                                    'ZIP: Unable to add "%s" with "getinfo"', arcname
                                )
                        if arcname:
                            tfp.add(os.path.join(root, name), arcname=arcname)

            if tempdir is not None:
                shutil.rmtree(tempdir)
                tempdir = None

    if extended_cfg:
        log.debug("Packing libraries based on alternative Salt versions")
        _pack_alternative(extended_cfg, digest_collector, tfp)

    os.chdir(thindir)
    with salt.utils.files.fopen(thinver, "w+") as fp_:
        fp_.write(salt.version.__version__)
    with salt.utils.files.fopen(pythinver, "w+") as fp_:
        fp_.write(
            str(sys.version_info.major)
        )  # future lint: disable=blacklisted-function
    with salt.utils.files.fopen(code_checksum, "w+") as fp_:
        fp_.write(digest_collector.digest())
    os.chdir(os.path.dirname(thinver))

    for fname in [
        "version",
        ".thin-gen-py-version",
        "salt-call",
        "supported-versions",
        "code-checksum",
    ]:
        tfp.add(fname)

    if start_dir and os.access(start_dir, os.R_OK) and os.access(start_dir, os.X_OK):
        os.chdir(start_dir)
    tfp.close()

    shutil.move(tmp_thintar, thintar)

    return thintar
Beispiel #47
0
def main(args):
    output = args[0]
    tail = args[1]
    with tarfile.open(output, 'w:') as tf:
        for f in find_gcno(os.getcwd(), tail):
            tf.add(f)
        (im_height, im_width, 3)).astype(np.uint8)


for mid in range(len(model_names)):
    cap = cv2.VideoCapture('input.avi')  # Change only if you have more than one webcams
    MODEL_NAME = model_names[mid]
    out = cv2.VideoWriter(MODEL_NAME+'.avi',fourcc, 20.0, (640,480))
    MODEL_BASE = 'frozen_model'
    MODEL_FILE = os.path.join(MODEL_BASE, MODEL_NAME + '.tar.gz')
    DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'

    # Path to frozen detection graph. This is the actual model that is used for the object detection.
    PATH_TO_CKPT = os.path.join(MODEL_BASE, MODEL_NAME, 'frozen_inference_graph.pb')


    tar_file = tarfile.open(MODEL_FILE)
    for file in tar_file.getmembers():
        file_name = os.path.basename(file.name)
        if 'frozen_inference_graph.pb' in file_name:
            tar_file.extract(file, os.path.join(os.getcwd(),MODEL_BASE))


    # Load a (frozen) Tensorflow model into memory.
    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')
    def handle(self, *args, **options):
        from chroma_core.lib import service_config
        from chroma_core.models import Bundle

        sc = service_config.ServiceConfig()
        sc._setup_rabbitmq_credentials()
        sc._setup_crypto()
        sc._syncdb()

        # default, works for --no-bundles
        profile_path = os.path.join(site_dir(), "../chroma-bundles/base_managed_RH7.profile")

        if options['no_bundles']:
            for bundle in ['iml-agent', 'external']:
                Bundle.objects.get_or_create(bundle_name=bundle, location="/tmp/", description="Dummy bundle")
        else:
            # override the default path if we have unpacked a real archive
            repo_profile_path = os.path.join(settings.DEV_REPO_PATH, 'base_managed_RH7.profile')
            if os.path.isfile(repo_profile_path):
                profile_path = repo_profile_path

            with open(profile_path) as f:
                bundle_names = json.load(f)['bundles']
            missing_bundles = bundle_names

            bundle_files = glob.glob(os.path.join(settings.DEV_REPO_PATH, "*-bundle.tar.gz"))
            for bundle_file in bundle_files:
                archive = tarfile.open(bundle_file, "r:gz")
                meta = json.load(archive.extractfile("./meta"))
                repo = os.path.join(settings.DEV_REPO_PATH, meta['name'])

                if not os.path.exists(os.path.join(repo, 'meta')):
                    print "Extracting %s" % meta['name']
                    if not os.path.exists(repo):
                        os.makedirs(repo)

                    archive.extractall(repo)
                    archive.close()

                if not Bundle.objects.filter(location=repo).exists():
                    service_config.bundle('register', repo)

                try:
                    missing_bundles.remove(meta['name'])
                except ValueError:
                    # Bundles not associated with a profile are OK
                    pass

            if len(missing_bundles):
                print """
Missing bundles: %(bundles)s

Package bundles are required for installation. In order to proceed, you
have 2 options:
    1. Download an installer from %(bundle_url)s and unpack it in %(repo_path)s
    2. Build an installer locally and unpack it in %(repo_path)s

Please note that the fake bundles can't be used to install real storage
servers -- you'll need to use one of the first two methods in order to make
that work.
    """ % {'bundle_url': "http://jenkins.lotus.hpdd.lab.intel.com/job/manager-for-lustre/arch=x86_64,distro=el7/lastSuccessfulBuild/artifact/chroma-bundles/", 'repo_path': settings.DEV_REPO_PATH, 'bundles': ", ".join(missing_bundles)}
                sys.exit(1)

        for profile_path in glob.glob(os.path.join(os.path.dirname(profile_path), '*.profile')):
            with open(profile_path) as profile_file:
                service_config.register_profile(profile_file)

        print """Great success:
 * run `systemctl start iml-manager.target`
 * open %s""" % settings.SERVER_HTTP_URL
Beispiel #50
0
                s.send(b'close')
                break

            # Stop the recording
            if has_data and datadec.find('stop') != -1 and writer is not None:
                sys.stdout.write(
                    "OPENMV: Stopping video, creating tarfile... ")
                sys.stdout.flush()
                writer.release()
                del (writer)
                writer = None
                f_timestamps.flush()
                f_timestamps.close()
                f_timestamps = None
                tarf = fn_timestamps[:-len(".timestamps")] + ".tar"
                with tarfile.open(tarf, 'w') as tar:
                    tar.add(tarf[:-len(".tar")])

                shutil.rmtree(tarf[:-len(".tar")])
                sys.stdout.write("done")

                # cmd = ['a2mp4.sh', '%s' % fn, '%s.mp4' % fn[:-4]]
                # proc = subprocess.Popen(cmd, bufsize=-1)
            if has_data and datadec.find('avi') != -1 and datadec.find(
                    'stop') == -1:
                if writer is None:
                    fn = datadec[datadec.find("begin") + 5:datadec.find("end")]
                    os.makedirs(fn)
                    fn_timestamps = fn + ".timestamps"
                    fn = os.path.join(fn, r'%09d.jpg')
                    print("OPENMV: Starting video " + fn)
Beispiel #51
0
    def from_pretrained(cls, pretrained_model_name_or_path, state_dict=None, cache_dir=None,
                        from_tf=False, *inputs, **kwargs):
        """
        Instantiate a BertPreTrainedModel from a pre-trained model file or a pytorch state dict.
        Download and cache the pre-trained model file if needed.

        Params:
            pretrained_model_name_or_path: either:
                - a str with the name of a pre-trained model to load selected in the list of:
                    . `bert-base-uncased`
                    . `bert-large-uncased`
                    . `bert-base-cased`
                    . `bert-large-cased`
                    . `bert-base-multilingual-uncased`
                    . `bert-base-multilingual-cased`
                    . `bert-base-chinese`
                - a path or url to a pretrained model archive containing:
                    . `bert_config.json` a configuration file for the model
                    . `pytorch_model.bin` a PyTorch dump of a BertForPreTraining instance
                - a path or url to a pretrained model archive containing:
                    . `bert_config.json` a configuration file for the model
                    . `model.chkpt` a TensorFlow checkpoint
            from_tf: should we load the weights from a locally saved TensorFlow checkpoint
            cache_dir: an optional path to a folder in which the pre-trained models will be cached.
            state_dict: an optional state dictionnary (collections.OrderedDict object) to use instead of Google pre-trained models
            *inputs, **kwargs: additional input for the specific Bert class
                (ex: num_labels for BertForSequenceClassification)
        """
        if pretrained_model_name_or_path in PRETRAINED_MODEL_ARCHIVE_MAP:
            archive_file = PRETRAINED_MODEL_ARCHIVE_MAP[pretrained_model_name_or_path]
        else:
            archive_file = pretrained_model_name_or_path
        # redirect to the cache, if necessary
        try:
            resolved_archive_file = cached_path(archive_file, cache_dir=cache_dir)
        except EnvironmentError:
            logger.error(
                "Model name '{}' was not found in model name list ({}). "
                "We assumed '{}' was a path or url but couldn't find any file "
                "associated to this path or url.".format(
                    pretrained_model_name_or_path,
                    ', '.join(PRETRAINED_MODEL_ARCHIVE_MAP.keys()),
                    archive_file))
            return None
        if resolved_archive_file == archive_file:
            logger.info("loading archive file {}".format(archive_file))
        else:
            logger.info("loading archive file {} from cache at {}".format(
                archive_file, resolved_archive_file))
        tempdir = None
        if os.path.isdir(resolved_archive_file) or from_tf:
            serialization_dir = resolved_archive_file
        else:
            # Extract archive to temp dir
            tempdir = tempfile.mkdtemp()
            logger.info("extracting archive file {} to temp dir {}".format(
                resolved_archive_file, tempdir))
            with tarfile.open(resolved_archive_file, 'r:gz') as archive:
                archive.extractall(tempdir)
            serialization_dir = tempdir
        # Load config
        config_file = os.path.join(serialization_dir, CONFIG_NAME)
        config = BertConfig.from_json_file(config_file)
        logger.info("Model config {}".format(config))
        # Instantiate model.
        model = cls(config, *inputs, **kwargs)
        if state_dict is None and not from_tf:
            weights_path = os.path.join(serialization_dir, WEIGHTS_NAME)
            state_dict = torch.load(weights_path, map_location='cpu' if not torch.cuda.is_available() else None)
        if tempdir:
            # Clean up temp dir
            shutil.rmtree(tempdir)
        if from_tf:
            # Directly load from a TensorFlow checkpoint
            weights_path = os.path.join(serialization_dir, TF_WEIGHTS_NAME)
            return load_tf_weights_in_bert(model, weights_path)
        # Load from a PyTorch state_dict
        old_keys = []
        new_keys = []
        for key in state_dict.keys():
            new_key = None
            if 'gamma' in key:
                new_key = key.replace('gamma', 'weight')
            if 'beta' in key:
                new_key = key.replace('beta', 'bias')
            if new_key:
                old_keys.append(key)
                new_keys.append(new_key)
        for old_key, new_key in zip(old_keys, new_keys):
            state_dict[new_key] = state_dict.pop(old_key)

        missing_keys = []
        unexpected_keys = []
        error_msgs = []
        # copy state_dict so _load_from_state_dict can modify it
        metadata = getattr(state_dict, '_metadata', None)
        state_dict = state_dict.copy()
        if metadata is not None:
            state_dict._metadata = metadata

        def load(module, prefix=''):
            local_metadata = {} if metadata is None else metadata.get(prefix[:-1], {})
            module._load_from_state_dict(
                state_dict, prefix, local_metadata, True, missing_keys, unexpected_keys, error_msgs)
            for name, child in module._modules.items():
                if child is not None:
                    load(child, prefix + name + '.')
        start_prefix = ''
        if not hasattr(model, 'bert') and any(s.startswith('bert.') for s in state_dict.keys()):
            start_prefix = 'bert.'
        load(model, prefix=start_prefix)
        if len(missing_keys) > 0:
            logger.info("Weights of {} not initialized from pretrained model: {}".format(
                model.__class__.__name__, missing_keys))
        if len(unexpected_keys) > 0:
            logger.info("Weights from pretrained model not used in {}: {}".format(
                model.__class__.__name__, unexpected_keys))
        if len(error_msgs) > 0:
            raise RuntimeError('Error(s) in loading state_dict for {}:\n\t{}'.format(
                               model.__class__.__name__, "\n\t".join(error_msgs)))
        return model
import argparse, tarfile, json
import numpy as np
import matplotlib.pyplot as plt

parser = argparse.ArgumentParser(description="Plot cancellation results from a .tar datafile generated from a BM test.")
parser.add_argument(type=str, dest="datafile", help="Tar file with the data to plot.")
args = parser.parse_args()

tar_datafile = tarfile.open(args.datafile)
testinfo_json = tar_datafile.extractfile("testinfo.json")
testinfo = json.load(testinfo_json)

freqs = np.linspace(0, testinfo['bw'], testinfo['nchannels'], endpoint=False)

for lo_comb in testinfo['lo_combinations']:
    plt.figure()
    datadir = '_'.join(['LO'+str(i+1)+'_'+str(lo/1e3)+'GHZ' for i,lo in enumerate(lo_comb)]) 
    cancellation_datafile = tar_datafile.extractfile(datadir + '/cancellation.npz')
    cancellation_data = np.load(cancellation_datafile)

    usb_freqs = lo_comb[0]/1.0e3 + sum(lo_comb[1:])/1.0e3 + freqs
        
    plt.plot(usb_freqs, cancellation_data['uncalibrated'], label='Uncalibrated')
    plt.plot(usb_freqs, cancellation_data['ideal'], label='Ideal Constants')
    plt.plot(usb_freqs, cancellation_data['calibrated'], label='Calibrated Constants')
    plt.grid()
    plt.xlabel('Frequency [GHz]')
    plt.ylabel('Power [dB]')
    plt.gcf().canvas.set_window_title(datadir)
    plt.legend()
    #plt.savefig(datadir+'.png')
Beispiel #53
0
for files in all_file:
    files_new = files.split(".")[0].replace("/", ".")
    Ext_name = Ext_inside.format(files_new, files)
    list_ext = "".join(list_ext + Ext_name)
    tar_file = "".join(files.split(".")[0] + ".so")
    tar_list.append(tar_file)

ext_outside_all = ext_outside.format(list_ext)
setup_py = """
from distutils.core import setup
from distutils.extension import Extension
from Cython.Build import cythonize
{}
setup(
    name = "lib"
    ext_modules = cythonize(extensions),
)
""".format(ext_outside_all)

with open("setup.py", "w") as f:
    f.write(setup_py)

os.system("python setup.py build_ext --inplace")
os.system("rm -rf logs/*")

tar_files = tarfile.open("".join("get_sql" + ".gz"), "w:gz")
for files in tar_list:
    tar_files.add(files)
tar_files.close()
Beispiel #54
0
def compile_data(tmp_dir, datasets, filename):
  """Concatenate all `datasets` and save to `filename`."""
  filename = os.path.join(tmp_dir, filename)
  lang1_fname = filename + ".lang1"
  lang2_fname = filename + ".lang2"
  if tf.gfile.Exists(lang1_fname) and tf.gfile.Exists(lang2_fname):
    tf.logging.info("Skipping compile data, found files:\n%s\n%s", lang1_fname,
                    lang2_fname)
  with tf.gfile.GFile(lang1_fname, mode="w") as lang1_resfile:
    with tf.gfile.GFile(lang2_fname, mode="w") as lang2_resfile:
      for dataset in datasets:
        url = dataset[0]
        compressed_filename = os.path.basename(url)
        compressed_filepath = os.path.join(tmp_dir, compressed_filename)
        if url.startswith("http"):
          generator_utils.maybe_download(tmp_dir, compressed_filename, url)

        if dataset[1][0] == "tsv":
          _, src_column, trg_column, glob_pattern = dataset[1]
          filenames = tf.gfile.Glob(os.path.join(tmp_dir, glob_pattern))
          if not filenames:
            # Capture *.tgz and *.tar.gz too.
            mode = "r:gz" if compressed_filepath.endswith("gz") else "r"
            with tarfile.open(compressed_filepath, mode) as corpus_tar:
              corpus_tar.extractall(tmp_dir)
            filenames = tf.gfile.Glob(os.path.join(tmp_dir, glob_pattern))
          for tsv_filename in filenames:
            if tsv_filename.endswith(".gz"):
              new_filename = tsv_filename.strip(".gz")
              generator_utils.gunzip_file(tsv_filename, new_filename)
              tsv_filename = new_filename
            with tf.gfile.Open(tsv_filename) as tsv_file:
              for line in tsv_file:
                if line and "\t" in line:
                  parts = line.split("\t")
                  source, target = parts[src_column], parts[trg_column]
                  source, target = source.strip(), target.strip()
                  if source and target:
                    lang1_resfile.write(source)
                    lang1_resfile.write("\n")
                    lang2_resfile.write(target)
                    lang2_resfile.write("\n")
        else:
          lang1_filename, lang2_filename = dataset[1]
          lang1_filepath = os.path.join(tmp_dir, lang1_filename)
          lang2_filepath = os.path.join(tmp_dir, lang2_filename)
          is_sgm = (
              lang1_filename.endswith("sgm") and lang2_filename.endswith("sgm"))

          if not (tf.gfile.Exists(lang1_filepath) and
                  tf.gfile.Exists(lang2_filepath)):
            # For .tar.gz and .tgz files, we read compressed.
            mode = "r:gz" if compressed_filepath.endswith("gz") else "r"
            with tarfile.open(compressed_filepath, mode) as corpus_tar:
              corpus_tar.extractall(tmp_dir)
          if lang1_filepath.endswith(".gz"):
            new_filepath = lang1_filepath.strip(".gz")
            generator_utils.gunzip_file(lang1_filepath, new_filepath)
            lang1_filepath = new_filepath
          if lang2_filepath.endswith(".gz"):
            new_filepath = lang2_filepath.strip(".gz")
            generator_utils.gunzip_file(lang2_filepath, new_filepath)
            lang2_filepath = new_filepath

          for example in text_problems.text2text_txt_iterator(
              lang1_filepath, lang2_filepath):
            line1res = _preprocess_sgm(example["inputs"], is_sgm)
            line2res = _preprocess_sgm(example["targets"], is_sgm)
            if line1res and line2res:
              lang1_resfile.write(line1res)
              lang1_resfile.write("\n")
              lang2_resfile.write(line2res)
              lang2_resfile.write("\n")

  return filename
Beispiel #55
0
 def test_tar_device_file(self):
     with tarfile.open('./tests/data/clean.tar', 'w') as zout:
         zout.add('/dev/null')
     with self.assertRaises(ValueError):
         archive.TarParser('./tests/data/clean.tar')
     os.remove('./tests/data/clean.tar')
Beispiel #56
0
def main(args):

    logging.basicConfig()

    # debug
    if args.debug:
        logger.setLevel(logging.DEBUG)

    db_uri = 'mongodb://'+args.db_host+':27017/lophi_db'
    DB = MongoDb(db_uri)
    analysis_datastore = datastore.Datastore(db_uri+G.DB_ANALYSES)
    files_datastore = datastore.Datastore(db_uri+'/fs.files')

    results = analysis_datastore.db.collection.find({'status':'COMPLETED',
                                                     "sample":args.sample_id,
                                                     'machine_type':int(args.machine_type)})

    logger.info("Number of completed analyses for sample id %s : %d" % (args.sample_id, results.count()))

    if not os.path.exists(args.out_dir):
        os.mkdir(args.out_dir)

    for analysis_doc in results:
        analysis_id = analysis_doc['_id']
        logger.info("Downloading files for analysis id %s" % analysis_id)

        outdir_path = os.path.join(args.out_dir, analysis_id)
        if not os.path.exists(outdir_path):
            os.mkdir(outdir_path)
        else:
            logger.info("Analysis directory already exists, skipping.")
            continue

        # write the analysis doc
        analysis_doc_f = open(os.path.join(outdir_path, 'analysis_doc'), 'w')
        analysis_doc_f.write(str(analysis_doc))
        analysis_doc_f.close()

        # grab the disk log
        disk_cap_id = analysis_doc['output_files']['disk_capture']
        disk_cap_url = os.path.join(outdir_path, 'disk.dcap')

        logger.debug("Downloading disk capture log %s" % disk_cap_url)
        files_datastore.download_file(disk_cap_id, disk_cap_url)

        # grab memory snapshots
        clean_memory_dump_id = analysis_doc['output_files']['memory_dump_clean']
        dirty_memory_dump_id = analysis_doc['output_files']['memory_dump_dirty']

        clean_url = os.path.join(outdir_path, 'clean_mem')
        dirty_url = os.path.join(outdir_path, 'dirty_mem')

        logger.debug("Downloading clean memory dump to %s" % clean_url)
        files_datastore.download_file(clean_memory_dump_id, clean_url)

        logger.debug("Downloading dirty memory dump to %s" % dirty_url)
        files_datastore.download_file(dirty_memory_dump_id, dirty_url)

        screenshot1 = os.path.join(outdir_path, 'screenshot_interm')
        screenshot1_id = analysis_doc['output_files']['screenshot']
        screenshot2 = os.path.join(outdir_path, 'screenshot_final')
        screenshot2_id = analysis_doc['output_files']['screenshot_final']

        if args.machine_type == 2:
            DB.download_file(screenshot1_id, screenshot1+'.ppm')
            DB.download_file(screenshot2_id,
            screenshot2+'.ppm')
        else:
            DB.download_file(screenshot1_id,
                                     screenshot1+'.png')
            DB.download_file(screenshot2_id,
            screenshot2+'.png')

        # unpack memory snapshots
        if tarfile.is_tarfile(clean_url):
            logger.debug("Unpacking %s" % clean_url)
            clean_path_out = os.path.join(outdir_path, "sut_memory_clean.mfd")
            clean_tar = tarfile.open(clean_url)
            clean_tar.extractall(outdir_path)
            clean_tar.close()

            # find stupid path
            p = os.path.join(outdir_path, 'lophi', 'tmp')
            p = os.path.join(p, os.listdir(p)[0])
            p = os.path.join(p, os.listdir(p)[0])

            logger.debug("Moving %s to %s" % (p, clean_path_out))
            shutil.move(p, clean_path_out)
            p = os.path.join(outdir_path, 'lophi')
            shutil.rmtree(p)

        if tarfile.is_tarfile(dirty_url):
            logger.debug("Unpacking %s" % dirty_url)
            dirty_path_out = os.path.join(outdir_path, "sut_memory_dirty.mfd")
            dirty_tar = tarfile.open(dirty_url)
            dirty_tar.extractall(outdir_path)
            dirty_tar.close()

            # find stupid path
            p = os.path.join(outdir_path, 'lophi', 'tmp')
            p = os.path.join(p, os.listdir(p)[0])
            p = os.path.join(p, os.listdir(p)[0])

            logger.debug("Moving %s to %s" % (p, dirty_path_out))
            shutil.move(p, dirty_path_out)
            p = os.path.join(outdir_path, 'lophi')
            shutil.rmtree(p)
Beispiel #57
0
    if 'places' in filename:
        out_path = os.path.join(cur_dir, 'resources', filename)
    else:
        out_path = os.path.join(cur_dir, 'models', filename)

    size = 0

    with open(out_path, 'wb') as f:
        while True:
            chunk = response.read(blocksize)
            if (not chunk):
                break
            f.write(chunk)
            size += len(chunk)

            if length:
                print('\tProgress: {:.0f}% ({:.2f} MB / {:.2f} MB)\r'.format(
                    100 * size / length, size / (1024 * 1024),
                    length / (1024 * 1024)),
                      end='')

        print()

        if 'models' in out_path:
            print('Extracting {}'.format(out_path))
            tf = tarfile.open(out_path)
            tf.extractall(path=os.path.dirname(out_path))

        print('DONE!')
Beispiel #58
0
    def tar_files(self, aTarFileBytes, formatt):
        "When called will unpack and edit a Tar File and return a tar file"

        print "[*] TarFile size:", len(aTarFileBytes) / 1024, 'KB'

        if len(aTarFileBytes) > int(self.userConfig['TAR']['maxSize']):
            print "[!] TarFile over allowed size"
            logging.info("TarFIle maxSize met %s", len(aTarFileBytes))
            return aTarFileBytes

        with tempfile.NamedTemporaryFile() as tarFileStorage:
            tarFileStorage.write(aTarFileBytes)
            tarFileStorage.flush()

            if not tarfile.is_tarfile(tarFileStorage.name):
                print '[!] Not a tar file'
                return aTarFileBytes

            compressionMode = ':'
            if formatt == 'gz':
                compressionMode = ':gz'
            if formatt == 'bz':
                compressionMode = ':bz2'

            tarFile = None
            try:
                tarFileStorage.seek(0)
                tarFile = tarfile.open(fileobj=tarFileStorage,
                                       mode='r' + compressionMode)
            except tarfile.ReadError:
                pass

            if tarFile is None:
                print '[!] Not a tar file'
                return aTarFileBytes

            print '[*] Tar file contents and info:'
            print '[*] Compression:', formatt

            members = tarFile.getmembers()
            for info in members:
                print "\t", info.name, info.mtime, info.size

            newTarFileStorage = tempfile.NamedTemporaryFile()
            newTarFile = tarfile.open(mode='w' + compressionMode,
                                      fileobj=newTarFileStorage)

            patchCount = 0
            wasPatched = False

            for info in members:
                print "[*] >>> Next file in tarfile:", info.name

                if not info.isfile():
                    print info.name, 'is not a file'
                    newTarFile.addfile(info, tarFile.extractfile(info))
                    continue

                if info.size >= long(self.FileSizeMax):
                    print info.name, 'is too big'
                    newTarFile.addfile(info, tarFile.extractfile(info))
                    continue

                # Check against keywords
                keywordCheck = False

                if type(self.tarblacklist) is str:
                    if self.tarblacklist.lower() in info.name.lower():
                        keywordCheck = True

                else:
                    for keyword in self.tarblacklist:
                        if keyword.lower() in info.name.lower():
                            keywordCheck = True
                            continue

                if keywordCheck is True:
                    print "[!] Tar blacklist enforced!"
                    logging.info('Tar blacklist enforced on %s', info.name)
                    continue

                # Try to patch
                extractedFile = tarFile.extractfile(info)

                if patchCount >= int(self.userConfig['TAR']['patchCount']):
                    newTarFile.addfile(info, extractedFile)
                else:
                    # create the file on disk temporarily for fileGrinder to run on it
                    with tempfile.NamedTemporaryFile() as tmp:
                        shutil.copyfileobj(extractedFile, tmp)
                        tmp.flush()
                        patchResult = self.binaryGrinder(tmp.name)
                        if patchResult:
                            patchCount += 1
                            file2 = "backdoored/" + os.path.basename(tmp.name)
                            print "[*] Patching complete, adding to tar file."
                            info.size = os.stat(file2).st_size
                            with open(file2, 'rb') as f:
                                newTarFile.addfile(info, f)
                            logging.info(
                                "%s in tar patched, adding to tarfile",
                                info.name)
                            os.remove(file2)
                            wasPatched = True
                        else:
                            print "[!] Patching failed"
                            with open(tmp.name, 'rb') as f:
                                newTarFile.addfile(info, f)
                            logging.info(
                                "%s patching failed. Keeping original file in tar.",
                                info.name)
                if patchCount == int(self.userConfig['TAR']['patchCount']):
                    logging.info("Met Tar config patchCount limit.")

            # finalize the writing of the tar file first
            newTarFile.close()

            # then read the new tar file into memory
            newTarFileStorage.seek(0)
            ret = newTarFileStorage.read()
            newTarFileStorage.close()  # it's automatically deleted

            if wasPatched is False:
                # If nothing was changed return the original
                print "[*] No files were patched forwarding original file"
                return aTarFileBytes
            else:
                return ret
Beispiel #59
0
def acquisitions_via_mtl(pathname):
    """
    Obtain a list of Acquisition objects from `pathname`.
    The argument `pathname` can be a MTL file or a directory name.
    If `pathname` is a directory then the MTL file will be search
    for in the directory and its children.
    Returns an instance of `AcquisitionsContainer`.
    """

    if isfile(pathname) and tarfile.is_tarfile(pathname):
        with tarfile.open(pathname, 'r') as tarball:
            try:
                member = next(filter(lambda mm: 'MTL' in mm.name, tarball.getmembers()))
                with tarball.extractfile(member) as fmem:
                     data = load_mtl(fmem)
                prefix_name = 'tar://{}!'.format(os.path.abspath(pathname))
            except StopIteration:
                raise OSError("Cannot find MTL file in %s" % pathname)
    else:
        if isdir(pathname):
            filename = find_in(pathname, 'MTL')
        else:
            filename = pathname
        if filename is None:
            raise OSError("Cannot find MTL file in %s" % pathname)
        data = load_mtl(filename)
        prefix_name = os.path.dirname(os.path.abspath(filename))

    bandfiles = [k for k in data['PRODUCT_METADATA'].keys() if 'band' in k
                 and 'file_name' in k]
    bands_ = [b.replace('file_name', '').strip('_') for b in bandfiles]

    # create an acquisition object for each band and attach
    # some appropriate metadata/attributes

    # shortcuts to the required levels
    prod_md = data['PRODUCT_METADATA']
    rad_md = data['MIN_MAX_RADIANCE']
    quant_md = data['MIN_MAX_PIXEL_VALUE']

    # acquisition datetime
    acq_date = prod_md.get('acquisition_date', prod_md['date_acquired'])
    centre_time = prod_md.get('scene_center_scan_time',
                              prod_md['scene_center_time'])
    acq_datetime = datetime.datetime.combine(acq_date, centre_time)

    # platform and sensor id's
    platform_id = fixname(prod_md['spacecraft_id'])
    sensor_id = prod_md['sensor_id']
    if sensor_id == 'ETM':
        sensor_id = 'ETM+'

    # get the appropriate landsat acquisition class
    try:
        acqtype = ACQUISITION_TYPE['_'.join([platform_id, sensor_id])]
    except KeyError:
        acqtype = LandsatAcquisition

    # solar angles
    solar_azimuth = nested_lookup('sun_azimuth', data)[0]
    solar_elevation = nested_lookup('sun_elevation', data)[0]

    # granule id
    granule_id = nested_lookup('landsat_scene_id', data)[0]

    # bands to ignore
    ignore = ['band_quality']

    # supported bands for the given platform & sensor id's
    band_configurations = SENSORS[platform_id][sensor_id]['band_ids']

    acqs = []
    for band in bands_:
        if band in ignore:
            continue

        # band id
        if 'vcid' in band:
            band_id = band.replace('_vcid_', '').replace('band', '').strip('_')
        else:
            band_id = band.replace('band', '').strip('_')

        # band info stored in sensors.json
        sensor_band_info = band_configurations.get(band_id, {})

        # band id name, band filename, band full file pathname
        band_fname = prod_md.get('{}_file_name'.format(band),
                                 prod_md['file_name_{}'.format(band)])
        fname = pjoin(prefix_name, band_fname)

        min_rad = rad_md.get('lmin_{}'.format(band),
                             rad_md['radiance_minimum_{}'.format(band)])
        max_rad = rad_md.get('lmax_{}'.format(band),
                             rad_md['radiance_maximum_{}'.format(band)])

        min_quant = quant_md.get('qcalmin_{}'.format(band),
                                 quant_md['quantize_cal_min_{}'.format(band)])
        max_quant = quant_md.get('qcalmax_{}'.format(band),
                                 quant_md['quantize_cal_max_{}'.format(band)])

        # metadata
        attrs = {k: v for k, v in sensor_band_info.items()}
        if attrs.get('supported_band'):
            attrs['solar_azimuth'] = solar_azimuth
            attrs['solar_elevation'] = solar_elevation
            attrs['min_radiance'] = min_rad
            attrs['max_radiance'] = max_rad
            attrs['min_quantize'] = min_quant
            attrs['max_quantize'] = max_quant

        # band_name is an internal property of acquisitions class
        band_name = attrs.pop('band_name', band_id)

        acqs.append(acqtype(pathname, fname, acq_datetime, band_name, band_id,
                            attrs))

    # resolution groups dict
    res_groups = create_resolution_groups(acqs)

    return AcquisitionsContainer(label=basename(pathname),
                                 granules={granule_id: res_groups})
Beispiel #60
0
inception_pretrain_model_url = 'http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz'

inception_pretrain_model_dir = "inception_model"
if not os.path.exists(inception_pretrain_model_dir):
    os.makedirs(inception_pretrain_model_dir)

filename = inception_pretrain_model_url.split('/')[-1]
filepath = os.path.join(inception_pretrain_model_dir, filename)

if not os.path.exists(filepath):
    print("downloading:", filename)
    r = requests.get(inception_pretrain_model_url, stream=True)
    with open(filepath, 'wb') as f:
        for chunk in r.iter_content(chunk_size=1024):
            if chunk:
                f.write(chunk)
print(filename, "download finished")
tarfile.open(filepath, 'r:gz').extractall(inception_pretrain_model_dir)

log_dir = 'inception_log'
if not os.path.exists(log_dir):
    os.makedirs(log_dir)
inception_graph_def_file = os.path.join(inception_pretrain_model_dir, 'classify_image_graph_def.pb')
with tf.Session() as sess:
    with tf.gfile.FastGFile(inception_graph_def_file, 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
        tf.import_graph_def(graph_def, name='')
    writer = tf.summary.FileWriter(log_dir, sess.graph)
    writer.close()