Ejemplo n.º 1
0
def zip_file(zip_main_file_path, zip_main_file_name, main_directory, definitions, _totals_):
    tmp_dir = main_directory + '/temp_unzips'
    zip_list = Ziplevels()
    zip_list.path = zip_main_file_path
    zip_list.name = zip_main_file_name
    zip_list.tmp_dir = os.path.join(tmp_dir, os.path.splitext(zip_list.name)[0])
    classes = [zip_list]
    try:
        for mylists in classes:
            if not os.path.exists(mylists.tmp_dir):
                os.makedirs(mylists.tmp_dir)
            z_file = tarfile.open(mylists.path) if tarfile.is_tarfile(mylists.path) else ZipFile(mylists.path)
            with z_file as zip_dir_path:
                zip_dir_path.extractall(path=mylists.tmp_dir)
                for (tmp_dir_name, tmp_sub_dir, tmp_file_name) in os.walk(mylists.tmp_dir, topdown=True):
                    for zip_file_name in tmp_file_name:
                        tmp_dir_path = os.path.join(tmp_dir_name, zip_file_name)
                        if zipfile.is_zipfile(zip_file_name) or tarfile.is_tarfile(tmp_dir_path):
                            temp_list = Ziplevels()
                            temp_list.path = tmp_dir_path
                            temp_list.name = zip_file_name
                            temp_list.tmp_dir = os.path.splitext(tmp_dir_path)[0]
                            classes.append(temp_list)
                        else:
                            file_check(tmp_dir_path, zip_file_name, main_directory, definitions, _totals_,
                                       zip_main_file_name)
        shutil.rmtree(tmp_dir)
    except Exception as e:
        log(zip_main_file_path + '  Is not a valid zipfile', str(e))
def check_volumes_result(target_folder):
    with (target_folder / "Manifest.yml").open("rt") as f:
        manifest = next(yaml.load_all(f))
    volumes = manifest["volumes"]
    assert len(volumes["project"]) == 1
    volume = volumes["project"]["care"]
    assert volume == "care.tar"
    archive = target_folder / "volumes" / "project" / volume
    assert archive.is_file()
    assert tarfile.is_tarfile(str(archive))
    assert count_dir_contents(target_folder / "volumes" / "project") == 1
    assert len(volumes["services"]) == 1
    assert len(volumes["services"]["foo"]) == 2
    volume = volumes["services"]["foo"]["/volume"]
    archive = target_folder / "volumes" / "services" / volume
    assert archive.is_file()
    assert tarfile.is_tarfile(str(archive))
    volume = volumes["services"]["foo"]["/image_volume1"]
    archive = target_folder / "volumes" / "services" / volume
    assert archive.is_file()
    assert tarfile.is_tarfile(str(archive))
    assert count_dir_contents(target_folder / "volumes" / "services") == 2
    assert len(volumes["mounted"]) == 3
    assert (target_folder / "volumes" / "mounted" / "asset.txt").is_file()
    assert (target_folder / "volumes" / "mounted" / "assets").is_dir()
    assert (target_folder / "volumes" / "mounted" / "assets" / "dummy").is_file()
    assert (target_folder / "volumes" / "mounted" / "local").is_dir()
    assert (target_folder / "volumes" / "mounted" / "local" / "dummy").is_file()
    assert count_dir_contents(target_folder / "volumes" / "mounted") == 3
Ejemplo n.º 3
0
    def test_make_tarball(self):
        # creating something to tar
        root_dir, base_dir = self._create_files('')

        tmpdir2 = self.mkdtemp()
        # force shutil to create the directory
        os.rmdir(tmpdir2)
        # working with relative paths
        work_dir = os.path.dirname(tmpdir2)
        rel_base_name = os.path.join(os.path.basename(tmpdir2), 'archive')

        with support.change_cwd(work_dir):
            base_name = os.path.abspath(rel_base_name)
            tarball = make_archive(rel_base_name, 'gztar', root_dir, '.')

        # check if the compressed tarball was created
        self.assertEqual(tarball, base_name + '.tar.gz')
        self.assertTrue(os.path.isfile(tarball))
        self.assertTrue(tarfile.is_tarfile(tarball))
        with tarfile.open(tarball, 'r:gz') as tf:
            self.assertEqual(sorted(tf.getnames()),
                             ['.', './file1', './file2',
                              './sub', './sub/file3', './sub2'])

        # trying an uncompressed one
        with support.change_cwd(work_dir):
            tarball = make_archive(rel_base_name, 'tar', root_dir, '.')
        self.assertEqual(tarball, base_name + '.tar')
        self.assertTrue(os.path.isfile(tarball))
        self.assertTrue(tarfile.is_tarfile(tarball))
        with tarfile.open(tarball, 'r') as tf:
            self.assertEqual(sorted(tf.getnames()),
                             ['.', './file1', './file2',
                              './sub', './sub/file3', './sub2'])
Ejemplo n.º 4
0
def parser_check():
	dirs,files = xbmcvfs.listdir(base_dir)
	if not dirs:
		dirpackages,filespackages = xbmcvfs.listdir(parser_packages_folder)
		if filespackages:
			for fich in filespackages:
				shutil.copyfile(os.path.join(parser_packages_folder,fich), os.path.join(parser_core_folder,fich))
				xbmc.sleep(100)
				import tarfile
				if tarfile.is_tarfile(os.path.join(parser_core_folder,fich)):
					download_tools().extract(os.path.join(parser_core_folder,fich),parser_core_folder)
					download_tools().remove(os.path.join(parser_core_folder,fich))
		else:
			dirsuserdata,files = xbmcvfs.listdir(parser_folder)
			for fich in files:
				dictionary_module = eval(readfile(os.path.join(parser_folder,fich)))
				if "url" in dictionary_module.keys():
					add_new_parser(dictionary_module["url"])
				else:
					xbmcvfs.copy(os.path.join(parser_packages_folder,fich.replace('.txt','.tar.gz')),os.path.join(parser_core_folder,fich.replace('.txt','.tar.gz')))
					import tarfile
					if tarfile.is_tarfile(os.path.join(parser_core_folder,fich.replace('.txt','.tar.gz'))):
						download_tools().extract(os.path.join(parser_core_folder,fich.replace('.txt','.tar.gz')),parser_core_folder)
						download_tools().remove(os.path.join(parser_core_folder,fich.replace('.txt','.tar.gz')))
	else: pass
	return
Ejemplo n.º 5
0
 def runTest(self):
     """Unpack tarfile relative to srcdir"""
     try:
             tarfile.is_tarfile(self.filepath)
             logger.debug("Tarfile Acquired: %s"%self.filepath)
     except IOError, err:
             self.fail("%s is not a tarfile."%self.filepath)
Ejemplo n.º 6
0
    def diff_ggl(self):
        args = self.args
        filename1, filename2, layout1, layout2 = args.f, args.F, args.l, args.L

        csvlayout1 = open(layout1, "r") if layout1 else None
        csvlayout2 = open(layout2, "r") if layout2 else None

        if tarfile.is_tarfile(filename1):
            paramconfig1, csvlayout1 = self.extract_from_ggl(filename1)
        else:
            paramconfig1 = open(filename1, "r")

        if tarfile.is_tarfile(filename2):
            paramconfig2, csvlayout2 = self.extract_from_ggl(filename2)
        else:
            paramconfig2 = open(filename2, "r")

        if paramconfig1 is not None and paramconfig2 is not None and csvlayout1 is not None and csvlayout2 is not None:
            params1 = self.read_paramconfig(paramconfig1, csvlayout1)
            params2 = self.read_paramconfig(paramconfig2, csvlayout2)
            self.compare(params1, params2)
            if type(paramconfig1) is file:
                paramconfig1.close()
            if type(paramconfig2) is file:
                paramconfig2.close()
            if type(csvlayout1) is file:
                csvlayout1.close()
            if type(csvlayout2) is file:
                csvlayout2.close()
        else:
            print args_parser.parse_args(["diff", "ggl", "-h"])

        return
Ejemplo n.º 7
0
def is_tarfile(arg):
	"""Helper function to test if a given filepath/file-like-object is of a
	tar like file.

	Limitation: We use name extension to determine this if the arg is a
	file-like-object. Valid extions are 'tar', 'gz', 'bz', 'bz2'."""
	if isinstance(arg, str):
		# Process filepaths
		tarfile.is_tarfile(arg)
	elif hasattr(arg, 'name'):
		# At the moment, we cannot check bytestreams for being tar files
		return os.path.splitext(arg.name)[-1] in ['tar', 'gz', 'bz', 'bz2']
	return False
Ejemplo n.º 8
0
def unpackArchive(archiveFile, targetBaseDir, subdir):
	"""Unpack archive into a directory"""

	if subdir and not subdir.endswith('/'):
		subdir += '/'
	# unpack source archive
	if tarfile.is_tarfile(archiveFile):
		tarFile = tarfile.open(archiveFile, 'r')
		members = None
		if subdir:
			members = [
				member for member in tarFile.getmembers()
				if member.name.startswith(subdir)
			]
			if not members:
				sysExit('sub-directory %s not found in archive' % subdir)
		tarFile.extractall(targetBaseDir, members)
		tarFile.close()
	elif zipfile.is_zipfile(archiveFile):
		zipFile = zipfile.ZipFile(archiveFile, 'r')
		names = None
		if subdir:
			names = [
				name for name in zipFile.namelist()
				if name.startswith(subdir)
			]
			if not names:
				sysExit('sub-directory %s not found in archive' % subdir)
		zipFile.extractall(targetBaseDir, names)
		zipFile.close()
	elif archiveFile.split('/')[-1].split('.')[-1] == 'xz':
		ensureCommandIsAvailable('xz')
		Popen(['xz', '-f', '-d', '-k', archiveFile]).wait()
		tar = archiveFile[:-3]
		if tarfile.is_tarfile(tar):
			tarFile = tarfile.open(tar, 'r')
			members = None
			if subdir:
				if not subdir.endswith('/'):
					subdir += '/'
				members = [
					member for member in tarFile.getmembers()
					if member.name.startswith(subdir)
				]
				if not members:
					sysExit('sub-directory %s not found in archive' % subdir)
			tarFile.extractall(targetBaseDir)
			tarFile.close()
	else:
		sysExit('Unrecognized archive type in file '
				+ archiveFile)
Ejemplo n.º 9
0
    def _extract(self, filename):
        """ extractor helper
        """
        try:
            file_type = self._get_file_type(filename)
            opener = mode = None

            if file_type == 'zip':
                opener, mode = zipfile.ZipFile, 'r'

            elif file_type == 'gz':
                if tarfile.is_tarfile(filename):
                    opener, mode = tarfile.open, 'r:gz'

            elif file_type == 'bz2':
                if tarfile.is_tarfile(filename):
                    opener, mode = tarfile.open, 'r:bz2'

            if not opener:
                raise Exception("Unsupported file compression")

            cfile = opener(filename, mode)

            # if first member is dir, skip 1st container path
            if file_type == 'zip':
                members = cfile.namelist()
            else:
                members = cfile.getmembers()

            stdout = ''
            for member in members:
                if file_type == 'zip':
                    member_name = member
                else:
                    member_name = member.name

                stdout += "Extracted " + member_name + "\n"
            cfile.extractall(self.working_dir)
            cfile.close()

        except Exception as e:
            try:
                return self._extract_alternative(filename)
            except:
                raise Exception("Could not extract file: %s" % e)

        ret = {'out': 0, 'stderr': '', 'stdout': stdout}
        return ret
Ejemplo n.º 10
0
def test_create_tarfile(monkeypatch, settings, format, extension):
    settings.BACKUPED_ROOT = BACKUPED_ROOT
    try:
        os.makedirs(BACKUPED_ROOT)
    except OSError:
        pass
    filename = 'edoardo-0.0.0-201501231405' + extension
    filepath = os.path.join(BACKUPS_ROOT, filename)
    try:
        # Make sure it doesn't exist before running backup.
        os.remove(filepath)
    except OSError:
        pass
    monkeypatch.setattr('ideascube.serveradmin.backup.Backup.ROOT',
                        BACKUPS_ROOT)
    monkeypatch.setattr('ideascube.serveradmin.backup.Backup.FORMAT', format)
    monkeypatch.setattr('ideascube.serveradmin.backup.make_name',
                        lambda f: filename)
    proof_file = os.path.join(settings.BACKUPED_ROOT, 'backup.me')
    open(proof_file, mode='w')
    Backup.create()
    assert os.path.exists(filepath)
    assert tarfile.is_tarfile(filepath)
    archive = tarfile.open(filepath)
    assert './backup.me' in archive.getnames()
    archive.close()
    os.remove(filepath)
    os.remove(proof_file)
Ejemplo n.º 11
0
def _get_archive_filelist(filename):
    # type: (str) -> List[str]
    """Extract the list of files from a tar or zip archive.

    Args:
        filename: name of the archive

    Returns:
        Sorted list of files in the archive, excluding './'

    Raises:
        ValueError: when the file is neither a zip nor a tar archive
        FileNotFoundError: when the provided file does not exist (for Python 3)
        IOError: when the provided file does not exist (for Python 2)
    """
    names = []  # type: List[str]
    if tarfile.is_tarfile(filename):
        with tarfile.open(filename) as tar_file:
            names = sorted(tar_file.getnames())
    elif zipfile.is_zipfile(filename):
        with zipfile.ZipFile(filename) as zip_file:
            names = sorted(zip_file.namelist())
    else:
        raise ValueError("Can not get filenames from '{!s}'. "
                         "Not a tar or zip file".format(filename))
    if "./" in names:
        names.remove("./")
    return names
Ejemplo n.º 12
0
 def setup_buffer(self):
     if self.exists == False:
         return None
     if self.subfile == None:
         # assume a regular file or gzipped
         filename, file_extension = os.path.splitext(self.fname)
         if file_extension == '.gzip' or file_extension == '.gz':
             import gzip
             try:
                 self.buffer = gzip.open(self.fname, 'r')
                 self.is_gzip = True
             except:
                 pass
                 #print >> sys.stderr,'[e] bad gzip file?',self.fname
         else:
             self.buffer = self.fname
     else:
         import tarfile
         if tarfile.is_tarfile(self.fname):
             self.tarfile = tarfile.open(self.fname, "r:gz")
             try:
                 tarinfo = self.tarfile.getmember(self.subfile)
             except:
                 print >> sys.stderr,'[e] file in archive not found:',self.subfile
                 tarinfo = None
                 self.buffer = None
             if tarinfo != None:
                 if tarinfo.isreg():
                     self.buffer = self.tarfile.extractfile(tarinfo)
                 else:
                     self.buffer = None
         else:
             self.buffer = None
Ejemplo n.º 13
0
def is_archive(filename):
    """
    test if file is a valid archive (zip, tar or rar)
    """
    return tarfile.is_tarfile(filename) or \
           zipfile.is_zipfile(filename) or \
           (ARCHIVE_RAR_AVAILABLE and rarfile.is_rarfile(filename))
Ejemplo n.º 14
0
 def _assert_tar_count_equals(self, file_name, count):
     if not tarfile.is_tarfile(file_name):
         # Mabye it's not a tar cuz it's a status message.
         fin = open(file_name, 'r')
         contents = fin.read(256)
         fin.close()
         if contents.lower().find("no ") != -1:
             self.assertEqual( 0, count)
             return
         raise Exception("%s is not a tar file" % file_name)
     tar = tarfile.open(file_name)
     tmp_dir = "unit_test_tmp"
     if os.path.exists(tmp_dir):
         filenames = os.listdir(tmp_dir)
         for file in filenames:
             os.remove(os.path.join(tmp_dir, file))
         os.rmdir(tmp_dir)            
     os.mkdir(tmp_dir)
     tar.extractall(path=tmp_dir)
     tar.close()
     filenames = os.listdir(tmp_dir)
     try:
         self.assertEqual( len(filenames), count)
     finally:
         # clean up
         for file in filenames:
             os.remove(os.path.join(tmp_dir, file))
         os.rmdir(tmp_dir)
Ejemplo n.º 15
0
def unarchive_file(archive_fpath, force_commonprefix=True):
    print('Unarchive: %r' % archive_fpath)
    if tarfile.is_tarfile(archive_fpath):
        return untar_file(archive_fpath, force_commonprefix=force_commonprefix)
    elif zipfile.is_zipfile(archive_fpath):
        return unzip_file(archive_fpath, force_commonprefix=force_commonprefix)
    elif archive_fpath.endswith('.gz') and not archive_fpath.endswith('.tar.gz'):
        """
        from utool.util_grabdata import *
        archive_fpath = '/home/joncrall/.config/utool/train-images-idx3-ubyte.gz'
        """
        # FIXME: unsure if this is general
        output_fpath = splitext(archive_fpath)[0]
        with gzip.open(archive_fpath, 'rb') as gzfile_:
            contents = gzfile_.read()
            with open(output_fpath, 'wb') as file_:
                file_.write(contents)
        return output_fpath
    #elif archive_fpath.endswith('.gz'):
    #    # This is to handle .gz files (not .tar.gz) like how MNIST is stored
    #    # Example: http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
    #    return ungz_file(archive_fpath)
    else:
        if archive_fpath.endswith('.zip') or archive_fpath.endswith('.tar.gz'):
            raise AssertionError('archive is corrupted: %r' % (archive_fpath,))
        raise AssertionError('unknown archive format: %r' % (archive_fpath,))
Ejemplo n.º 16
0
Archivo: util.py Proyecto: bchess/mrjob
def unarchive(archive_path, dest):
    """Extract the contents of a tar or zip file at *archive_path* into the
    directory *dest*.

    :type archive_path: str
    :param archive_path: path to archive file
    :type dest: str
    :param dest: path to directory where archive will be extracted

    *dest* will be created if it doesn't already exist.

    tar files can be gzip compressed, bzip2 compressed, or uncompressed. Files
    within zip files can be deflated or stored.
    """
    if tarfile.is_tarfile(archive_path):
        with contextlib.closing(tarfile.open(archive_path, 'r')) as archive:
            archive.extractall(dest)
    elif zipfile.is_zipfile(archive_path):
        with contextlib.closing(zipfile.ZipFile(archive_path, 'r')) as archive:
            for name in archive.namelist():
                # the zip spec specifies that front slashes are always
                # used as directory separators
                dest_path = os.path.join(dest, *name.split('/'))

                # now, split out any dirname and filename and create
                # one and/or the other
                dirname, filename = os.path.split(dest_path)
                if dirname and not os.path.exists(dirname):
                    os.makedirs(dirname)
                if filename:
                    with open(dest_path, 'wb') as dest_file:
                        dest_file.write(archive.read(name))
    else:
        raise IOError('Unknown archive type: %s' % (archive_path,))
Ejemplo n.º 17
0
def archive_open(name):
    if tarfile.is_tarfile(name):
        return tarfile.open(name)
    elif zipfile.is_zipfile(name):
        return zipfile.ZipFile(name)
    else:
        return None
Ejemplo n.º 18
0
def archive_get_type(name):
    if tarfile.is_tarfile(name):
        return 'tar'
    elif zipfile.is_zipfile(name):
        return 'zip'
    else:
        return None
Ejemplo n.º 19
0
	def read_packages(self):
		print("Reading {0}...".format(self._cache_db), end="", flush=True)

		if not os.path.exists(self._cache_db):
			print(" not found!")
			return False
		if not tarfile.is_tarfile(self._cache_db):
			print(" not a tar!")
			return False

		tar = tarfile.open(self._cache_db)
		pkg_info = {}
		self._pkgs = {}

		for info in tar.getmembers():
			if not info.isfile():
				continue

			( binpkg_name, file_name ) = info.name.split("/")

			pi = pkg_info.get(binpkg_name, {})
			with tar.extractfile(info) as file:
				pi[file_name] = [x.decode("utf-8").rstrip() for x in file.readlines()]

			if len(pi.keys() & { "desc", "depends" }) == 2:
				BinaryRepo.BinPkg(self, pi["desc"], pi["depends"])
				del pkg_info[binpkg_name]
				continue

			pkg_info[binpkg_name] = pi

		if len(pkg_info) != 0:
			raise Exception("Incomplete packages in DB")

		print(" done")
Ejemplo n.º 20
0
def decompress(filename, out_dir='/tmp/decompressed'):
    """
    Given a tar.gz or a zip, extract the contents and return a list of files.
    If the out_dir already exists, we skip decompression and just return the
    files inside that dir.  
    Otherwise it will be created from scratch and
    filled with the files from the compressed file
    """
    if os.path.exists(out_dir):
        return glob.glob(os.path.join(out_dir, '*'))
    os.makedirs(out_dir)
    del_dir = False

    fn = filename #alias
    try:
        if zipfile.is_zipfile(fn):
            zipfile.ZipFile(fn, 'r').extractall(out_dir)
        elif tarfile.is_tarfile(fn):
            tarfile.open(fn, 'r').extractall(out_dir)
        else:
            raise ValueError('Invalid file type - must be tar.gz or zip')
    except Exception as e:
        del_dir = True #delete the partially created out_dir
        raise e #pass exception through
    finally:
        if del_dir:
            shutil.rmtree(out_dir)
    
    return [os.path.join(out_dir, f) for f in os.listdir(out_dir)]
Ejemplo n.º 21
0
def extract(path, extdir=None, delete=False):
    """
    Takes in a tar or zip file and extracts it to extdir
    If extdir is not specified, extracts to path
    If delete is set to True, deletes the bundle at path
    Returns the list of top level files that were extracted
    """
    if zipfile.is_zipfile(path):
        bundle = zipfile.ZipFile(path)
        namelist = bundle.namelist()
    elif tarfile.is_tarfile(path):
        bundle = tarfile.open(path)
        namelist = bundle.getnames()
    else:
        return
    if extdir is None:
        extdir = os.path.dirname(path)
    elif not os.path.exists(extdir):
        os.makedirs(extdir)
    bundle.extractall(path=extdir)
    bundle.close()
    if delete:
        os.remove(path)
    return [os.path.join(extdir, name) for name in namelist
                if len(name.rstrip(os.sep).split(os.sep)) == 1]
Ejemplo n.º 22
0
def repackage_dmg(infile, output):

    if not tarfile.is_tarfile(infile):
        raise Exception("Input file %s is not a valid tarfile." % infile)

    tmpdir = tempfile.mkdtemp()
    try:
        with tarfile.open(infile) as tar:
            tar.extractall(path=tmpdir)

        # Remove the /Applications symlink. If we don't, an rsync command in
        # create_dmg() will break, and create_dmg() re-creates the symlink anyway.
        try:
            os.remove(mozpath.join(tmpdir, ' '))
        except OSError as e:
            if e.errno != errno.ENOENT:
                raise

        volume_name = get_application_ini_value(tmpdir, 'App', 'CodeName')

        # The extra_files argument is empty [] because they are already a part
        # of the original dmg produced by the build, and they remain in the
        # tarball generated by the signing task.
        create_dmg(tmpdir, output, volume_name, [])

    finally:
        shutil.rmtree(tmpdir)
Ejemplo n.º 23
0
def create_install_repo_from_tgz_node(host_string, *tgzs, **kwargs):
    """Create contrail repos from each tgz files in the given node
       * tgzs can be absolute/relative paths or a pattern
    """
    # verify tgz's availability
    cant_use = []
    usable_tgz_files = []
    for tgz in tgzs:
        tgz_files = os.path.abspath(os.path.expanduser(tgz))
        tgz_file_list = glob.glob(tgz_files)
        for tgz_file in tgz_file_list:
            if not os.access(tgz_file, os.R_OK):
                cant_use.append(tgz_file)
            elif not tarfile.is_tarfile(tgz_file):
                cant_use.append(tgz_file)
            else:
                usable_tgz_files.append(tgz_file)

    if len(cant_use) != 0:
        print "ERROR: TGZ file mentioned below are not readable or", \
              "not a valid tgz file or do not exists"
        print "\n".join(cant_use)

    for tgz in usable_tgz_files:
        with settings(host_string=host_string, warn_only=True):
            os_type = detect_ostype()
        if os_type in ['centos', 'fedora', 'redhat', 'centoslinux']:
            execute(create_yum_repo_from_tgz_node, tgz, host_string, **kwargs)
        elif os_type in ['ubuntu']:
            execute(create_apt_repo_from_tgz_node, tgz, host_string, **kwargs)
def unpack_file(filename, location, content_type, link):
    filename = os.path.realpath(filename)
    if (content_type == 'application/zip' or
            filename.lower().endswith(ZIP_EXTENSIONS) or
            zipfile.is_zipfile(filename)):
        unzip_file(
            filename,
            location,
            flatten=not filename.endswith('.whl')
        )
    elif (content_type == 'application/x-gzip' or
            tarfile.is_tarfile(filename) or
            filename.lower().endswith(
                TAR_EXTENSIONS + BZ2_EXTENSIONS + XZ_EXTENSIONS)):
        untar_file(filename, location)
    elif (content_type and content_type.startswith('text/html') and
            is_svn_page(file_contents(filename))):
        # We don't really care about this
        from pip.vcs.subversion import Subversion
        Subversion('svn+' + link.url).unpack(location)
    else:
        # FIXME: handle?
        # FIXME: magic signatures?
        logger.critical(
            'Cannot unpack file %s (downloaded from %s, content-type: %s); '
            'cannot detect archive format',
            filename, location, content_type,
        )
        raise InstallationError(
            'Cannot determine archive format of %s' % location
        )
Ejemplo n.º 25
0
def install(src, dest):
    """Install a zip, exe, tar.gz, tar.bz2 or dmg file, and return the path of
    the installation folder.

    :param src: Path to the install file
    :param dest: Path to install to (to ensure we do not overwrite any existent
                 files the folder should not exist yet)
    """
    src = os.path.realpath(src)
    dest = os.path.realpath(dest)

    if not is_installer(src):
        raise InvalidSource(src + ' is not valid installer file.')

    if not os.path.exists(dest):
        os.makedirs(dest)

    trbk = None
    try:
        install_dir = None
        if zipfile.is_zipfile(src) or tarfile.is_tarfile(src):
            install_dir = mozfile.extract(src, dest)[0]
        elif src.lower().endswith('.dmg'):
            install_dir = _install_dmg(src, dest)
        elif src.lower().endswith('.exe'):
            install_dir = _install_exe(src, dest)

        return install_dir

    except Exception, ex:
        cls, exc, trbk = sys.exc_info()
        error = InstallError('Failed to install "%s (%s)"' % (src, str(ex)))
        raise InstallError, error, trbk
Ejemplo n.º 26
0
def unzip_archive(archive):
    """
    Unzips an archive into a temporary directory
    Returns a link to that directory

    Arguments:
    archive -- the path to an archive file
    """
    tmpdir = os.path.join(tempfile.gettempdir(),
            os.path.basename(archive))
    assert tmpdir != archive # That wouldn't work out

    if os.path.exists(tmpdir):
        # files are already extracted
        pass
    else:
        if tarfile.is_tarfile(archive):
            print 'Extracting tarfile ...'
            with tarfile.open(archive) as tf:
                tf.extractall(path=tmpdir)
        elif zipfile.is_zipfile(archive):
            print 'Extracting zipfile ...'
            with zipfile.ZipFile(archive) as zf:
                zf.extractall(path=tmpdir)
        else:
            raise ValueError('Unknown file type for %s' % os.path.basename(archive))
    return tmpdir
Ejemplo n.º 27
0
    def handle_rules_in_tar(self, f):
        if (not tarfile.is_tarfile(f.name)):
            raise OSError("Invalid tar file")

        self.updated_date = timezone.now()
        self.first_run = False

        repo = self.get_git_repo(delete = True)

        f.seek(0)
        # extract file
        tfile = tarfile.open(fileobj=f)
        # FIXME This test is only for rules archive
        for member in tfile.getmembers():
            if not member.name.startswith("rules"):
                raise SuspiciousOperation("Suspect tar file contains a invalid name '%s'" % (member.name))

        source_git_dir = os.path.join(settings.GIT_SOURCES_BASE_DIRECTORY, str(self.pk))
        tfile.extractall(path=source_git_dir)
        index = repo.index
        if len(index.diff(None)) or self.first_run:
            os.environ['USERNAME'] = '******'
            index.add(["rules"])
            message =  'source version at %s' % (self.updated_date)
            index.commit(message)

        self.save()
        # Now we must update SourceAtVersion for this source
        # or create it if needed
        self.create_sourceatversion()
        # Get categories
        self.get_categories(tfile)
Ejemplo n.º 28
0
    def can_read(cls, filename):
        """
        Given an archive filename, returns True if this class can read and
        process the archive format of that file.
        """

        return tarfile.is_tarfile(filename)
Ejemplo n.º 29
0
def extract_and_process(file_name, callback, *additional_args):
    """ Extracts a tar file and runs 'callback' on all files inside 
    
    file_name: tar file to extract
    callback: function to run on all files
    additional_args: optional, additional arguments for callback
    """
    folder_name = os.path.basename(file_name).split('.')[0]
    if os.path.exists(folder_name):
        print "This script will delete the folder" + \
              "'%s' and all its contents" % folder_name
        are_you_sure()
        process_folder(folder_name, os.remove)
        os.rmdir(folder_name)
    os.mkdir(folder_name)
    try:
        print "Extracting %s to %s" % (file_name, folder_name)
        if not tarfile.is_tarfile(file_name): raise Exception("Not a tarfile")
        tar = tarfile.open(file_name)
        tar.extractall(folder_name)
        tar.close()
        print "Extraction successful."

        process_folder(folder_name, callback, *additional_args)
    finally:
        process_folder(folder_name, os.remove)
        os.rmdir(folder_name)
Ejemplo n.º 30
0
 def is_tar(path, file):
     try:
         if tarfile.is_tarfile(os.path.join(path, file)):
             return True
     except OSError, e:
         logging.error("Error in is_tar for '%s': %s" % (file, e))
         raise OSError
Ejemplo n.º 31
0
def do_get_s3_archive(profile, region, bucket, table, archive):
    """
    Fetch latest file named filename from S3

    Bucket must exist prior to running this function.
    filename is args.dumpPath.  File would be "args.dumpPath" with suffix .tar.bz2 or .zip
    """

    s3 = _get_aws_client(profile, region, "s3")

    if archive:
        if archive == "tar":
            archive_type = "tar.bz2"
        else:
            archive_type = "zip"

    # Make sure bucket exists before continuing
    try:
        s3.head_bucket(
            Bucket=bucket
        )
    except botocore.exceptions.ClientError as e:
        logging.exception("S3 bucket " + bucket + " does not exist. "
                          "Can't get backup file\n\n" + str(e))
        sys.exit(1)

    try:
        contents = s3.list_objects_v2(
            Bucket=bucket,
            Prefix=args.dumpPath
        )
    except botocore.exceptions.ClientError as e:
        logging.exception("Issue listing contents of bucket " + bucket + "\n\n" + str(e))
        sys.exit(1)

    # Script will always overwrite older backup.  Bucket versioning stores multiple backups.
    # Therefore, just get item from bucket based on table name since that's what we name the files.
    filename = None
    for d in contents["Contents"]:
        if d["Key"] == "{}/{}.{}".format(args.dumpPath, table, archive_type):
            filename = d["Key"]

    if not filename:
        logging.exception("Unable to find file to restore from.  "
                          "Confirm the name of the table you're restoring.")
        sys.exit(1)

    output_file = "/tmp/" + os.path.basename(filename)
    logging.info("Downloading file " + filename + " to " + output_file)
    s3.download_file(bucket, filename, output_file)

    # Extract archive based on suffix
    if tarfile.is_tarfile(output_file):
        try:
            logging.info("Extracting tar file...")
            with tarfile.open(name=output_file, mode="r:bz2") as a:
                a.extractall(path=".")
        except tarfile.ReadError as e:
            logging.exception("Error reading downloaded archive\n\n" + str(e))
            sys.exit(1)
        except tarfile.ExtractError as e:
            # ExtractError is raised for non-fatal errors on extract method
            logging.error("Error during extraction: " + str(e))

    # Assuming zip file here since we're only supporting tar and zip at this time
    else:
        try:
            logging.info("Extracting zip file...")
            with zipfile.ZipFile(output_file, "r") as z:
                z.extractall(path=".")
        except zipfile.BadZipFile as e:
            logging.exception("Problem extracting zip file\n\n" + str(e))
            sys.exit(1)
Ejemplo n.º 32
0
def uncompress_file(func, filename, *args, **kwargs):
    """
    Decorator used for temporary uncompressing file if .gz or .bz2 archive.
    """
    if not kwargs.get('check_compression', True):
        return func(filename, *args, **kwargs)
    if not isinstance(filename, str):
        return func(filename, *args, **kwargs)
    elif not Path(filename).exists():
        msg = "File not found '%s'" % (filename)
        raise IOError(msg)
    # check if we got a compressed file or archive
    obj_list = []
    if tarfile.is_tarfile(filename):
        try:
            # reading with transparent compression
            with tarfile.open(filename, 'r|*') as tar:
                for tarinfo in tar:
                    # only handle regular files
                    if not tarinfo.isfile():
                        continue
                    data = tar.extractfile(tarinfo).read()
                    # Skip empty files - we don't need them no matter what
                    # and it guards against rare cases where waveforms files
                    # are also slightly valid tar-files.
                    if not data:
                        continue
                    obj_list.append(data)
        except Exception:
            pass
    elif zipfile.is_zipfile(filename):
        try:
            zip = zipfile.ZipFile(filename)
            obj_list = [zip.read(name) for name in zip.namelist()]
        except Exception:
            pass
    elif filename.endswith('.bz2'):
        # bz2 module
        try:
            import bz2
            with open(filename, 'rb') as fp:
                obj_list.append(bz2.decompress(fp.read()))
        except Exception:
            pass
    elif filename.endswith('.gz'):
        # gzip module
        try:
            import gzip
            with gzip.open(filename, 'rb') as fp:
                obj_list.append(fp.read())
        except Exception:
            pass
    # handle results
    if obj_list:
        # write results to temporary files
        result = None
        for obj in obj_list:
            with NamedTemporaryFile() as tempfile:
                tempfile._fileobj.write(obj)
                stream = func(tempfile.name, *args, **kwargs)
                # just add other stream objects to first stream
                if result is None:
                    result = stream
                else:
                    result += stream
    else:
        # no compressions
        result = func(filename, *args, **kwargs)
    return result
Ejemplo n.º 33
0
def DownloadURL(url, context, force, dontExtract=None):
    """Download and extract the archive file at given URL to the
    source directory specified in the context.

    dontExtract may be a sequence of path prefixes that will
    be excluded when extracting the archive.

    Returns the absolute path to the directory where files have
    been extracted."""
    with CurrentWorkingDirectory(context.srcDir):
        # Extract filename from URL and see if file already exists.
        filename = url.split("/")[-1]
        if force and os.path.exists(filename):
            os.remove(filename)

        if os.path.exists(filename):
            PrintInfo("{0} already exists, skipping download".format(
                os.path.abspath(filename)))
        else:
            PrintInfo("Downloading {0} to {1}".format(
                url, os.path.abspath(filename)))

            # To work around occasional hiccups with downloading from websites
            # (SSL validation errors, etc.), retry a few times if we don't
            # succeed in downloading the file.
            maxRetries = 5
            lastError = None

            # Download to a temporary file and rename it to the expected
            # filename when complete. This ensures that incomplete downloads
            # will be retried if the script is run again.
            tmpFilename = filename + ".tmp"
            if os.path.exists(tmpFilename):
                os.remove(tmpFilename)

            for i in xrange(maxRetries):
                try:
                    context.downloader(url, tmpFilename)
                    break
                except Exception as e:
                    PrintCommandOutput(
                        "Retrying download due to error: {err}\n".format(
                            err=e))
                    lastError = e
            else:
                errorMsg = str(lastError)
                if "SSL: TLSV1_ALERT_PROTOCOL_VERSION" in errorMsg:
                    errorMsg += (
                        "\n\n"
                        "Your OS or version of Python may not support "
                        "TLS v1.2+, which is required for downloading "
                        "files from certain websites. This support "
                        "was added in Python 2.7.9."
                        "\n\n"
                        "You can use curl to download dependencies "
                        "by installing it in your PATH and re-running "
                        "this script.")
                raise RuntimeError("Failed to download {url}: {err}".format(
                    url=url, err=errorMsg))

            shutil.move(tmpFilename, filename)

        # Open the archive and retrieve the name of the top-most directory.
        # This assumes the archive contains a single directory with all
        # of the contents beneath it.
        archive = None
        rootDir = None
        members = None
        try:
            if tarfile.is_tarfile(filename):
                archive = tarfile.open(filename)
                rootDir = archive.getnames()[0].split('/')[0]
                if dontExtract != None:
                    members = (m for m in archive.getmembers()
                               if not any((fnmatch.fnmatch(m.name, p)
                                           for p in dontExtract)))
            elif zipfile.is_zipfile(filename):
                archive = zipfile.ZipFile(filename)
                rootDir = archive.namelist()[0].split('/')[0]
                if dontExtract != None:
                    members = (m for m in archive.getnames()
                               if not any((fnmatch.fnmatch(m, p)
                                           for p in dontExtract)))
            else:
                raise RuntimeError("unrecognized archive file type")

            with archive:
                extractedPath = os.path.abspath(rootDir)
                if force and os.path.isdir(extractedPath):
                    shutil.rmtree(extractedPath)

                if os.path.isdir(extractedPath):
                    PrintInfo("Directory {0} already exists, skipping extract".
                              format(extractedPath))
                else:
                    PrintInfo(
                        "Extracting archive to {0}".format(extractedPath))

                    # Extract to a temporary directory then move the contents
                    # to the expected location when complete. This ensures that
                    # incomplete extracts will be retried if the script is run
                    # again.
                    tmpExtractedPath = os.path.abspath("extract_dir")
                    if os.path.isdir(tmpExtractedPath):
                        shutil.rmtree(tmpExtractedPath)

                    archive.extractall(tmpExtractedPath, members=members)

                    shutil.move(os.path.join(tmpExtractedPath, rootDir),
                                extractedPath)
                    shutil.rmtree(tmpExtractedPath)

                return extractedPath
        except Exception as e:
            # If extraction failed for whatever reason, assume the
            # archive file was bad and move it aside so that re-running
            # the script will try downloading and extracting again.
            shutil.move(filename, filename + ".bad")
            raise RuntimeError(
                "Failed to extract archive {filename}: {err}".format(
                    filename=filename, err=e))
Ejemplo n.º 34
0
import tarfile

for f_name in [
        'hello.py', 'work.tar.gz', 'welcome.py', 'nofile.tar', 'sample.tar.xz'
]:
    try:
        print('{:} {}'.format(f_name, tarfile.is_tarfile(f_name)))
    except IOError as err:
        print('{:} {}'.format(f_name, err))
Ejemplo n.º 35
0
def cached_path(
    url_or_filename: Union[str, PathLike],
    cache_dir: Union[str, Path] = None,
    extract_archive: bool = False,
    force_extract: bool = False,
) -> str:
    """
    Given something that might be a URL (or might be a local path),
    determine which. If it's a URL, download the file and cache it, and
    return the path to the cached file. If it's already a local path,
    make sure the file exists and then return the path.

    # Parameters

    url_or_filename : `Union[str, Path]`
        A URL or local file to parse and possibly download.

    cache_dir : `Union[str, Path]`, optional (default = `None`)
        The directory to cache downloads.

    extract_archive : `bool`, optional (default = `False`)
        If `True`, then zip or tar.gz archives will be automatically extracted.
        In which case the directory is returned.

    force_extract : `bool`, optional (default = `False`)
        If `True` and the file is an archive file, it will be extracted regardless
        of whether or not the extracted directory already exists.
    """
    if cache_dir is None:
        cache_dir = CACHE_DIRECTORY

    cache_dir = os.path.expanduser(cache_dir)
    os.makedirs(cache_dir, exist_ok=True)

    if isinstance(url_or_filename, PathLike):
        url_or_filename = str(url_or_filename)

    file_path: str

    # If we're using the /a/b/foo.zip!c/d/file.txt syntax, handle it here.
    exclamation_index = url_or_filename.find("!")
    if extract_archive and exclamation_index >= 0:
        archive_path = url_or_filename[:exclamation_index]
        file_name = url_or_filename[exclamation_index + 1:]

        # Call 'cached_path' recursively now to get the local path to the archive itself.
        cached_archive_path = cached_path(archive_path, cache_dir, True,
                                          force_extract)
        if not os.path.isdir(cached_archive_path):
            raise ValueError(
                f"{url_or_filename} uses the ! syntax, but does not specify an archive file."
            )

        # Now return the full path to the desired file within the extracted archive,
        # provided it exists.
        file_path = os.path.join(cached_archive_path, file_name)
        if not os.path.exists(file_path):
            raise FileNotFoundError(
                f"file {file_name} not found within {archive_path}")

        return file_path

    parsed = urlparse(url_or_filename)

    extraction_path: Optional[str] = None

    if parsed.scheme in ("http", "https", "s3"):
        # URL, so get it from the cache (downloading if necessary)
        file_path = get_from_cache(url_or_filename, cache_dir)

        if extract_archive and (is_zipfile(file_path)
                                or tarfile.is_tarfile(file_path)):
            # This is the path the file should be extracted to.
            # For example ~/.allennlp/cache/234234.21341 -> ~/.allennlp/cache/234234.21341-extracted
            extraction_path = file_path + "-extracted"

    else:
        url_or_filename = os.path.expanduser(url_or_filename)

        if os.path.exists(url_or_filename):
            # File, and it exists.
            file_path = url_or_filename
            # Normalize the path.
            url_or_filename = os.path.abspath(url_or_filename)

            if extract_archive and (is_zipfile(file_path)
                                    or tarfile.is_tarfile(file_path)):
                # We'll use a unique directory within the cache to root to extract the archive to.
                # The name of the directoy is a hash of the resource file path and it's modification
                # time. That way, if the file changes, we'll know when to extract it again.
                extraction_name = (_resource_to_filename(
                    url_or_filename, str(os.path.getmtime(file_path))) +
                                   "-extracted")
                extraction_path = os.path.join(cache_dir, extraction_name)

        elif parsed.scheme == "":
            # File, but it doesn't exist.
            raise FileNotFoundError(f"file {url_or_filename} not found")

        else:
            # Something unknown
            raise ValueError(
                f"unable to parse {url_or_filename} as a URL or as a local path"
            )

    if extraction_path is not None:
        # If the extracted directory already exists (and is non-empty), then no
        # need to extract again unless `force_extract=True`.
        if os.path.isdir(extraction_path) and os.listdir(
                extraction_path) and not force_extract:
            return extraction_path

        # Extract it.
        with FileLock(extraction_path + ".lock"):
            logger.info("Extracting %s to %s", url_or_filename,
                        extraction_path)
            shutil.rmtree(extraction_path, ignore_errors=True)

            # We extract first to a temporary directory in case something goes wrong
            # during the extraction process so we don't end up with a corrupted cache.
            tmp_extraction_dir = tempfile.mkdtemp(
                dir=os.path.split(extraction_path)[0])
            try:
                if is_zipfile(file_path):
                    with ZipFile(file_path, "r") as zip_file:
                        zip_file.extractall(tmp_extraction_dir)
                        zip_file.close()
                else:
                    tar_file = tarfile.open(file_path)
                    tar_file.extractall(tmp_extraction_dir)
                    tar_file.close()
                # Extraction was successful, rename temp directory to final
                # cache directory and dump the meta data.
                os.replace(tmp_extraction_dir, extraction_path)
                meta = _Meta(
                    resource=url_or_filename,
                    cached_path=extraction_path,
                    creation_time=time.time(),
                    extraction_dir=True,
                    size=_get_resource_size(extraction_path),
                )
                meta.to_file()
            finally:
                shutil.rmtree(tmp_extraction_dir, ignore_errors=True)

        return extraction_path

    return file_path
Ejemplo n.º 36
0
    def __init__(self,
                 file_uri: str,
                 encoding: str = DEFAULT_ENCODING,
                 cache_dir: str = None) -> None:

        self.uri = file_uri
        self._encoding = encoding
        self._cache_dir = cache_dir
        self._archive_handle: Any = None  # only if the file is inside an archive

        main_file_uri, path_inside_archive = parse_embeddings_file_uri(
            file_uri)
        main_file_local_path = cached_path(main_file_uri, cache_dir=cache_dir)

        if zipfile.is_zipfile(main_file_local_path):  # ZIP archive
            self._open_inside_zip(main_file_uri, path_inside_archive)

        elif tarfile.is_tarfile(main_file_local_path):  # TAR archive
            self._open_inside_tar(main_file_uri, path_inside_archive)

        else:  # all the other supported formats, including uncompressed files
            if path_inside_archive:
                raise ValueError('Unsupported archive format: %s' +
                                 main_file_uri)

            # All the python packages for compressed files share the same interface of io.open
            extension = get_file_extension(main_file_uri)

            # Some systems don't have support for all of these libraries, so we import them only
            # when necessary.
            package = None
            if extension in ['.txt', '.vec']:
                package = io
            elif extension == '.gz':
                import gzip
                package = gzip
            elif extension == ".bz2":
                import bz2
                package = bz2
            elif extension == ".lzma":
                import lzma
                package = lzma

            if package is None:
                logger.warning(
                    'The embeddings file has an unknown file extension "%s". '
                    'We will assume the file is an (uncompressed) text file',
                    extension)
                package = io

            self._handle = package.open(main_file_local_path,
                                        'rt',
                                        encoding=encoding)  # type: ignore

        # To use this with tqdm we'd like to know the number of tokens. It's possible that the
        # first line of the embeddings file contains this: if it does, we want to start iteration
        # from the 2nd line, otherwise we want to start from the 1st.
        # Unfortunately, once we read the first line, we cannot move back the file iterator
        # because the underlying file may be "not seekable"; we use itertools.chain instead.
        first_line = next(self._handle)  # this moves the iterator forward
        self.num_tokens = EmbeddingsTextFile._get_num_tokens_from_first_line(
            first_line)
        if self.num_tokens:
            # the first line is a header line: start iterating from the 2nd line
            self._iterator = self._handle
        else:
            # the first line is not a header line: start iterating from the 1st line
            self._iterator = itertools.chain([first_line], self._handle)
Ejemplo n.º 37
0
def dosub(sub):
    sub.set_processing_started()
    sub.save()
    print('Submission disk file:', sub.disk_file)

    if sub.disk_file is None:
        logmsg('Sub %i: retrieving URL' % (sub.id), sub.url)
        (fn, headers) = urllib.urlretrieve(sub.url)
        logmsg('Sub %i: wrote URL to file' % (sub.id), fn)
        df = DiskFile.from_file(fn, Image.ORIG_COLLECTION)
        logmsg('Created DiskFile', df)
        # Try to split the URL into a filename component and save it
        p = urlparse(sub.url)
        p = p.path
        if p:
            s = p.split('/')
            origname = s[-1]
            sub.original_filename = origname
        df.save()
        sub.disk_file = df
        sub.save()
        logmsg('Saved DiskFile', df)

    else:
        logmsg('uploaded disk file for this submission is ' + str(sub.disk_file))

    df = sub.disk_file
    fn = df.get_path()
    logmsg('DiskFile path ' + fn)

    original_filename = sub.original_filename
    # check if file is a gzipped file
    try:
        gzip_file = gzip.open(fn)
        f,tempfn = tempfile.mkstemp()
        os.close(f)
        f = open(tempfn,'wb')
        # should fail on the following line if not a gzip file
        f.write(gzip_file.read())
        f.close()
        gzip_file.close()
        df = DiskFile.from_file(tempfn, 'uploaded-gunzip')
        i = original_filename.find('.gz')
        if i != -1:
            original_filename = original_filename[:i]
        logmsg('extracted gzip file %s' % original_filename)
        #fn = tempfn
        fn = df.get_path()
    except:
        # not a gzip file
        pass

    is_tar = False
    try:
        is_tar = tarfile.is_tarfile(fn)
    except:
        pass
    if is_tar:
        logmsg('File %s: tarball' % fn)
        tar = tarfile.open(fn)
        dirnm = tempfile.mkdtemp()
        for tarinfo in tar.getmembers():
            if tarinfo.isfile():
                logmsg('extracting file %s' % tarinfo.name)
                tar.extract(tarinfo, dirnm)
                tempfn = os.path.join(dirnm, tarinfo.name)
                df = DiskFile.from_file(tempfn, 'uploaded-untar')
                # create Image object
                img = get_or_create_image(df)
                # create UserImage object.
                if img:
                    create_user_image(sub, img, tarinfo.name)
        tar.close()
        shutil.rmtree(dirnm, ignore_errors=True)
    else:
        # assume file is single image
        logmsg('File %s: single file' % fn)
        # create Image object
        img = get_or_create_image(df)
        logmsg('File %s: created Image %s' % (fn, str(img)))
        # create UserImage object.
        if img:
            logmsg('File %s: Image id %i' % (fn, img.id))
            uimg = create_user_image(sub, img, original_filename)
            logmsg('Image %i: created UserImage %i' % (img.id, uimg.id))

    sub.set_processing_finished()
    sub.save()
    return sub.id
Ejemplo n.º 38
0
def read_part(file_path, size=DATA_MAX_BUFF, encoding=CODING_TYPE):
    '分段获取文件内容'
    if tarfile.is_tarfile(file_path):
        return read_tar_gz_file_part(file_path, size, encoding)
    return read_normal_file_part(file_path, size, encoding)
Ejemplo n.º 39
0
def cached_path(
    url_or_filename,
    cache_dir=None,
    force_download=False,
    proxies=None,
    resume_download=False,
    user_agent=None,
    extract_compressed_file=False,
    force_extract=False,
    local_files_only=False,
) -> Optional[str]:
    """
    Given something that might be a URL (or might be a local path),
    determine which. If it's a URL, download the file and cache it, and
    return the path to the cached file. If it's already a local path,
    make sure the file exists and then return the path.
    Args:
        cache_dir: specify a cache directory to save the file to (overwrite the default cache dir).
        force_download: if True, re-dowload the file even if it's already cached in the cache dir.
        resume_download: if True, resume the download if incompletly recieved file is found.
        user_agent: Optional string or dict that will be appended to the user-agent on remote requests.
        extract_compressed_file: if True and the path point to a zip or tar file, extract the compressed
            file in a folder along the archive.
        force_extract: if True when extract_compressed_file is True and the archive was already extracted,
            re-extract the archive and overide the folder where it was extracted.

    Return:
        None in case of non-recoverable file (non-existent or inaccessible url + no cache on disk).
        Local path (string) otherwise
    """
    if cache_dir is None:
        cache_dir = TRANSFORMERS_CACHE
    if isinstance(url_or_filename, Path):
        url_or_filename = str(url_or_filename)
    if isinstance(cache_dir, Path):
        cache_dir = str(cache_dir)

    if is_remote_url(url_or_filename):
        # URL, so get it from the cache (downloading if necessary)
        output_path = get_from_cache(
            url_or_filename,
            cache_dir=cache_dir,
            force_download=force_download,
            proxies=proxies,
            resume_download=resume_download,
            user_agent=user_agent,
            local_files_only=local_files_only,
        )
    elif os.path.exists(url_or_filename):
        # File, and it exists.
        output_path = url_or_filename
    elif urlparse(url_or_filename).scheme == "":
        # File, but it doesn't exist.
        raise EnvironmentError("file {} not found".format(url_or_filename))
    else:
        # Something unknown
        raise ValueError("unable to parse {} as a URL or as a local path".format(url_or_filename))

    if extract_compressed_file:
        if not is_zipfile(output_path) and not tarfile.is_tarfile(output_path):
            return output_path

        # Path where we extract compressed archives
        # We avoid '.' in dir name and add "-extracted" at the end: "./model.zip" => "./model-zip-extracted/"
        output_dir, output_file = os.path.split(output_path)
        output_extract_dir_name = output_file.replace(".", "-") + "-extracted"
        output_path_extracted = os.path.join(output_dir, output_extract_dir_name)

        if os.path.isdir(output_path_extracted) and os.listdir(output_path_extracted) and not force_extract:
            return output_path_extracted

        # Prevent parallel extractions
        lock_path = output_path + ".lock"
        with FileLock(lock_path):
            shutil.rmtree(output_path_extracted, ignore_errors=True)
            os.makedirs(output_path_extracted)
            if is_zipfile(output_path):
                with ZipFile(output_path, "r") as zip_file:
                    zip_file.extractall(output_path_extracted)
                    zip_file.close()
            elif tarfile.is_tarfile(output_path):
                tar_file = tarfile.open(output_path)
                tar_file.extractall(output_path_extracted)
                tar_file.close()
            else:
                raise EnvironmentError("Archive format of {} could not be identified".format(output_path))

        return output_path_extracted

    return output_path
Ejemplo n.º 40
0

##################################################
# tarfile
##################################################

'''
read tar files
'''
# https://pymotw.com/3/tarfile/index.html
# read/write access to Unix tar archives, including compressed files
import tarfile

for filename in ['README.txt', 'example.tar']:
    try:
        print('{:15} {}'.format(filename, tarfile.is_tarfile(filename)))
    except IOError as err:
        print('{:>15}  {}'.format(filename, err))


# read the names of files in an existing archive
import tarfile
with tarfile.open('example.tar', 'r') as t:
    print(t.getnames())


# metadata about the archive members
import tarfile
import time

with tarfile.open('example.tar', 'r') as t:
 def can_decompress(file_path):
     return tarfile.is_tarfile(file_path) or zipfile.is_zipfile(file_path)
Ejemplo n.º 42
0
def _safe_is_tarfile(path):
    """safe version of is_tarfile, return False on IOError"""
    try:
        return tarfile.is_tarfile(path)
    except IOError:
        return False
Ejemplo n.º 43
0
    def update(self):
        """
        Downloads the latest source tarball from github and installs it over the existing version.
        """
        base_url = furl(self.repositoryBase)
        base_url.path.add(self.repository)
        base_url.path.add("tarball")
        base_url.path.add(self.branch)
        tar_download_url = base_url.url
        main_dir = os.path.dirname(os.path.dirname(__file__))

        try:
            self.backup()
            # prepare the update dir
            update_dir = os.path.join(main_dir, 'update')

            if os.path.isdir(update_dir):
                logger.info("Clearing out update folder " + update_dir +
                            " before extracting")
                shutil.rmtree(update_dir)

            logger.info("Creating update folder " + update_dir +
                        " before extracting")
            os.makedirs(update_dir)

            # retrieve file
            logger.info("Downloading update from " + repr(tar_download_url))
            tar_download_path = os.path.join(update_dir, 'sb-update.tar')
            urllib.urlretrieve(tar_download_url, tar_download_path)

            if not os.path.isfile(tar_download_path):
                logger.error("Unable to retrieve new version from " +
                             tar_download_url + ", can't update")
                return False

            if not tarfile.is_tarfile(tar_download_path):
                logger.error("Retrieved version from " + tar_download_url +
                             " is corrupt, can't update")
                return False

            # extract to sb-update dir
            logger.info("Extracting update file " + tar_download_path)
            tar = tarfile.open(tar_download_path)
            tar.extractall(update_dir)
            tar.close()

            # delete .tar.gz
            logger.info("Deleting update file " + tar_download_path)
            os.remove(tar_download_path)

            # find update dir name
            update_dir_contents = [
                x for x in os.listdir(update_dir)
                if os.path.isdir(os.path.join(update_dir, x))
            ]
            if len(update_dir_contents) != 1:
                logger.error("Invalid update data, update failed: " +
                             str(update_dir_contents))
                return False
            content_dir = os.path.join(update_dir, update_dir_contents[0])

            dontUpdateThese = [
            ]  #("msvcm90.dll", "msvcr90.dll", "msvcm90.dll")
            #rename exes, pyd and dll files so they can be overwritten
            filesToRename = []
            for filename in os.listdir(main_dir):
                if (filename.endswith(".pyd") or filename.endswith(".dll")
                        or filename.endswith(".exe")
                    ) and filename not in dontUpdateThese:
                    filesToRename.append((filename, filename + ".updated"))
            logger.info("Renaming %d files so they can be overwritten" %
                        len(filesToRename))
            for toRename in filesToRename:
                logger.debug("Renaming %s to %s" % (toRename[0], toRename[1]))
                shutil.move(toRename[0], toRename[1])

            # walk temp folder and move files to main folder
            logger.info("Moving files from " + content_dir + " to " + main_dir)
            for dirname, dirnames, filenames in os.walk(content_dir):
                dirname = dirname[len(content_dir) + 1:]
                for curfile in filenames:
                    if curfile not in dontUpdateThese:
                        old_path = os.path.join(content_dir, dirname, curfile)
                        new_path = os.path.join(main_dir, dirname, curfile)
                        logger.debug("Updating %s" % curfile)
                        if os.path.isfile(new_path):
                            os.remove(new_path)
                        os.renames(old_path, new_path)
                    else:
                        logger.debug("Skipping %s" % curfile)

        except Exception as e:
            logger.error("Error while trying to update: " + str(e))
            return False
        logger.info("Update successful")
        return True
Ejemplo n.º 44
0
    def __init__(self,
                 config,
                 batch_size,
                 checkpoint_dir_or_path=None,
                 var_name_substitutions=None,
                 session_target='',
                 **sample_kwargs):
        if tf.gfile.IsDirectory(checkpoint_dir_or_path):
            checkpoint_path = tf.train.latest_checkpoint(
                checkpoint_dir_or_path)
        else:
            checkpoint_path = checkpoint_dir_or_path
        self._config = copy.deepcopy(config)
        self._config.hparams.batch_size = batch_size
        with tf.Graph().as_default():
            model = self._config.model
            model.build(self._config.hparams,
                        self._config.data_converter.output_depth,
                        is_training=False)
            # Input placeholders
            self._temperature = tf.placeholder(tf.float32, shape=())

            if self._config.hparams.z_size:
                self._z_input = tf.placeholder(
                    tf.float32,
                    shape=[batch_size, self._config.hparams.z_size])
            else:
                self._z_input = None

            if self._config.data_converter.control_depth > 0:
                self._c_input = tf.placeholder(
                    tf.float32,
                    shape=[None, self._config.data_converter.control_depth])
            else:
                self._c_input = None

            self._inputs = tf.placeholder(
                tf.float32,
                shape=[
                    batch_size, None, self._config.data_converter.input_depth
                ])
            self._controls = tf.placeholder(
                tf.float32,
                shape=[
                    batch_size, None, self._config.data_converter.control_depth
                ])
            self._inputs_length = tf.placeholder(
                tf.int32,
                shape=[batch_size] +
                list(self._config.data_converter.length_shape))
            self._max_length = tf.placeholder(tf.int32, shape=())
            # Outputs
            self._outputs, self._decoder_results = model.sample(
                batch_size,
                max_length=self._max_length,
                z=self._z_input,
                c_input=self._c_input,
                temperature=self._temperature,
                **sample_kwargs)
            if self._config.hparams.z_size:
                q_z = model.encode(self._inputs, self._inputs_length,
                                   self._controls)
                self._mu = q_z.loc
                self._sigma = q_z.scale.diag
                self._z = q_z.sample()

            var_map = None
            if var_name_substitutions is not None:
                var_map = {}
                for v in tf.global_variables():
                    var_name = v.name[:-2]  # Strip ':0' suffix.
                    for pattern, substitution in var_name_substitutions:
                        var_name = re.sub(pattern, substitution, var_name)
                    if var_name != v.name[:-2]:
                        tf.logging.info('Renaming `%s` to `%s`.', v.name[:-2],
                                        var_name)
                    var_map[var_name] = v

            # Restore graph
            self._sess = tf.Session(target=session_target)
            saver = tf.train.Saver(var_map)
            if (os.path.exists(checkpoint_path)
                    and tarfile.is_tarfile(checkpoint_path)):
                tf.logging.info('Unbundling checkpoint.')
                with tempfile.TemporaryDirectory() as temp_dir:
                    tar = tarfile.open(checkpoint_path)
                    tar.extractall(temp_dir)
                    # Assume only a single checkpoint is in the directory.
                    for name in tar.getnames():
                        if name.endswith('.index'):
                            checkpoint_path = os.path.join(
                                temp_dir, name[0:-6])
                            break
                    saver.restore(self._sess, checkpoint_path)
            else:
                saver.restore(self._sess, checkpoint_path)
Ejemplo n.º 45
0
    def update(self):
        """
        Downloads the latest source tarball from github and installs it over the existing version.
        """
        base_url = furl(self.repositoryBase)
        base_url.path.add(self.repository)
        base_url.path.add("tarball")
        base_url.path.add(self.branch)
        tar_download_url = base_url.url
        main_dir = os.path.dirname(os.path.dirname(__file__))

        try:
            self.backup()

            # prepare the update dir
            update_dir = os.path.join(main_dir, 'update')

            if os.path.isdir(update_dir):
                logger.info("Clearing out update folder " + update_dir +
                            " before extracting")
                shutil.rmtree(update_dir)

            logger.info("Creating update folder " + update_dir +
                        " before extracting")
            os.makedirs(update_dir)

            # retrieve file
            logger.info("Downloading update from " + repr(tar_download_url))
            tar_download_path = os.path.join(update_dir, 'sb-update.tar')
            response = requests.get(
                tar_download_url, stream=True, verify=False
            )  #Apparently SSL causes problems on some systems (#138)b
            with open(tar_download_path, 'wb') as out_file:
                shutil.copyfileobj(response.raw, out_file)
            del response

            if not os.path.isfile(tar_download_path):
                logger.error("Unable to retrieve new version from " +
                             tar_download_url + ", can't update")
                return False

            if not tarfile.is_tarfile(tar_download_path):
                logger.error("Retrieved version from " + tar_download_url +
                             " is corrupt, can't update")
                return False

            # extract to sb-update dir
            logger.info("Extracting update file " + tar_download_path)
            tar = tarfile.open(tar_download_path)
            tar.extractall(update_dir)
            tar.close()

            # delete .tar.gz
            logger.info("Deleting update file " + tar_download_path)
            os.remove(tar_download_path)

            # find update dir name
            update_dir_contents = [
                x for x in os.listdir(update_dir)
                if os.path.isdir(os.path.join(update_dir, x))
            ]
            if len(update_dir_contents) != 1:
                logger.error("Invalid update data, update failed: " +
                             str(update_dir_contents))
                return False
            content_dir = os.path.join(update_dir, update_dir_contents[0])

            # walk temp folder and move files to main folder
            logger.info("Moving files from " + content_dir + " to " + main_dir)
            for dirname, dirnames, filenames in os.walk(content_dir):
                dirname = dirname[len(content_dir) + 1:]
                for curfile in filenames:
                    old_path = os.path.join(content_dir, dirname, curfile)
                    new_path = os.path.join(main_dir, dirname, curfile)

                    if os.path.isfile(new_path):
                        os.remove(new_path)
                    os.renames(old_path, new_path)

        except Exception as e:
            logger.error("Error while trying to update: " + str(e))
            return False
        logger.info("Update successful")
        return True
Ejemplo n.º 46
0
def install(src, dest):
    """Install a zip, exe, tar.gz, tar.bz2 or dmg file, and return the path of
    the installation folder.

    :param src: Path to the install file
    :param dest: Path to install to (to ensure we do not overwrite any existent
                 files the folder should not exist yet)
    """
    if not is_installer(src):
        msg = "{} is not a valid installer file".format(src)
        if '://' in src:
            try:
                return _install_url(src, dest)
            except Exception:
                exc, val, tb = sys.exc_info()
                msg = "{} ({})".format(msg, val)
                reraise(InvalidSource, msg, tb)
        raise InvalidSource(msg)

    src = os.path.realpath(src)
    dest = os.path.realpath(dest)

    did_we_create = False
    if not os.path.exists(dest):
        did_we_create = True
        os.makedirs(dest)

    trbk = None
    try:
        install_dir = None
        if src.lower().endswith('.dmg'):
            install_dir = _install_dmg(src, dest)
        elif src.lower().endswith('.exe'):
            install_dir = _install_exe(src, dest)
        elif zipfile.is_zipfile(src) or tarfile.is_tarfile(src):
            install_dir = mozfile.extract(src, dest)[0]

        return install_dir

    except BaseException:
        cls, exc, trbk = sys.exc_info()
        if did_we_create:
            try:
                # try to uninstall this properly
                uninstall(dest)
            except Exception:
                # uninstall may fail, let's just try to clean the folder
                # in this case
                try:
                    mozfile.remove(dest)
                except Exception:
                    pass
        if issubclass(cls, Exception):
            error = InstallError('Failed to install "%s (%s)"' % (src, str(exc)))
            reraise(InstallError, error, trbk)
        # any other kind of exception like KeyboardInterrupt is just re-raised.
        reraise(cls, exc, trbk)

    finally:
        # trbk won't get GC'ed due to circular reference
        # http://docs.python.org/library/sys.html#sys.exc_info
        del trbk
Ejemplo n.º 47
0
 def is_tarfile(self):
     try:
         return tarfile.is_tarfile(self.archive_path)
     except Exception:
         return False
Ejemplo n.º 48
0
    def populate(self):
        logger.info("Preprocessing dataset")

        was_extracted = False
        if len(self.filenames) > 0:
            file_path = os.path.join(self.save_path, self.filenames[0])
            if not os.path.exists(file_path[:-7]):  # nothing extracted yet
                if tarfile.is_tarfile(file_path):
                    logger.info("Extracting tar file")
                    tar = tarfile.open(file_path, "r:gz")
                    tar.extractall(path=self.save_path)
                    was_extracted = True
                    tar.close()

        # get exact path of the extract, for robustness to changes is the 10X storage logic
        path_to_data, suffix = self.find_path_to_data()

        # get filenames, according to 10X storage logic
        measurements_filename = "genes.tsv" if suffix == "" else "features.tsv.gz"
        barcode_filename = "barcodes.tsv" + suffix

        matrix_filename = "matrix.mtx" + suffix
        expression_data = sp_io.mmread(os.path.join(path_to_data, matrix_filename)).T
        if self.dense:
            expression_data = expression_data.A
        else:
            expression_data = csr_matrix(expression_data)

        # group measurements by type (e.g gene, protein)
        # in case there are multiple measurements, e.g protein
        # they are indicated in the third column
        gene_expression_data = expression_data
        measurements_info = pd.read_csv(
            os.path.join(path_to_data, measurements_filename), sep="\t", header=None
        )
        Ys = None
        if measurements_info.shape[1] < 3:
            gene_names = measurements_info[self.measurement_names_column].astype(np.str)
        else:
            gene_names = None
            for measurement_type in np.unique(measurements_info[2]):
                # .values required to work with sparse matrices
                measurement_mask = (measurements_info[2] == measurement_type).values
                measurement_data = expression_data[:, measurement_mask]
                measurement_names = measurements_info[self.measurement_names_column][
                    measurement_mask
                ].astype(np.str)
                if measurement_type == "Gene Expression":
                    gene_expression_data = measurement_data
                    gene_names = measurement_names
                else:
                    Ys = [] if Ys is None else Ys
                    if measurement_type == "Antibody Capture":
                        measurement_type = "protein_expression"
                        columns_attr_name = "protein_names"
                        # protein counts do not have many zeros so always make dense
                        if self.dense is not True:
                            measurement_data = measurement_data.A
                    else:
                        measurement_type = measurement_type.lower().replace(" ", "_")
                        columns_attr_name = measurement_type + "_names"
                    measurement = CellMeasurement(
                        name=measurement_type,
                        data=measurement_data,
                        columns_attr_name=columns_attr_name,
                        columns=measurement_names,
                    )
                    Ys.append(measurement)
            if gene_names is None:
                raise ValueError(
                    "When loading measurements, no 'Gene Expression' category was found."
                )

        batch_indices, cell_attributes_dict = None, None
        if os.path.exists(os.path.join(path_to_data, barcode_filename)):
            barcodes = pd.read_csv(
                os.path.join(path_to_data, barcode_filename), sep="\t", header=None
            )
            cell_attributes_dict = {
                "barcodes": np.squeeze(np.asarray(barcodes, dtype=str))
            }
            # As of 07/01, 10X barcodes have format "%s-%d" where the digit is a batch index starting at 1
            batch_indices = np.asarray(
                [barcode.split("-")[-1] for barcode in cell_attributes_dict["barcodes"]]
            )
            batch_indices = batch_indices.astype(np.int64) - 1

        logger.info("Finished preprocessing dataset")

        self.populate_from_data(
            X=gene_expression_data,
            batch_indices=batch_indices,
            gene_names=gene_names,
            cell_attributes_dict=cell_attributes_dict,
            Ys=Ys,
        )
        self.filter_cells_by_count()

        # cleanup if required
        if was_extracted and self.remove_extracted_data:
            logger.info("Removing extracted data at {}".format(file_path[:-7]))
            shutil.rmtree(file_path[:-7])
Ejemplo n.º 49
0
    def evidencetype(self, evidence, sspath):
        # take initial hash of file/dir
        Tools().inithash(self.case_name, self.secure_store_location,
                         self.evidence)
        # split filename from path set as method variable
        filename = evidence.split("/")[-1]
        # check if file or dir
        if os.path.isfile(evidence):
            # copy file to securestore
            os.system(f"sudo cp {evidence} {sspath}/image")
            print("copying to image folder in securestore...")
            # check file type of evidence
            if not tarfile.is_tarfile(evidence) or not zipfile.is_zipfile(
                    evidence):
                # copy to logical to work with file
                imagefile = f"{sspath}/image/{filename}"
                os.system(
                    f"sudo cp {sspath}/image/{filename} {sspath}/logical")
                logfile = f"{sspath}/logical/{filename}"
                # carve out file type from linux "file" query
                file_out = subprocess.check_output(["file",
                                                    logfile]).decode("utf-8")
                ftype = file_out.split(":")[1].split(",")[0].split()[0].upper()
                # open file_type csv in config_files to find filetype
                file = open(
                    os.path.dirname(os.path.abspath("file_type.csv")) +
                    "/config_files/file_type.csv", "r")
                for line in file:
                    ft = line.split(",")[0]
                    filedescription = line.split(",")[1]
                    if ft.upper() == ftype.upper():
                        print("Looks like you're working with a " + filename +
                              ": " + filedescription)
                        break
                    elif (ft.upper() in ftype.upper()
                          ) and not (ft.upper() == ftype.upper()):
                        print("Possible match to filetype is: " + filename +
                              ": " + filedescription)
                        break
                    elif filetype.guess(logfile) is not None:
                        print('File extension: %s' %
                              filetype.guess(logfile).extension)
                        print('File MIME type: %s' %
                              filetype.guess(logfile).mime)
                        break
                    else:
                        print(
                            "This file type is unknown, autofor will still try to examine it"
                        )
                        break
                # hash for files
                Tools().hash(self.case_name, f"{sspath}/logical/{filename}",
                             self.secure_store_location)

            # check if disk image file
            if "DOS/MBR" in ftype:
                print(
                    f"Looks like {filename} is a disk image, Autofor will mount it for you..."
                )
                fsplit = file_out.split(" ")
                if "startsector" in fsplit:
                    offset = int(
                        fsplit[(fsplit.index("startsector") + 1)][:-1]) * 512
                    # exceptions for creating directory if exists. Don't seem to work**
                else:
                    offset = 0
                # create mount location with max 10 characters for so not too messy
                fname = logfile.split("/")[-1][:10]
                try:
                    print("Making mount directory...")
                    os.system(f"sudo mkdir /mnt/{fname}")
                except FileExistsError:
                    print(
                        "Looks like this mount point already exists... Mounting..."
                    )
                # mount disk image. Haven't used exceptions
                os.system(
                    f"sudo mount -o ro,loop,offset={offset} {logfile} /mnt/{fname}"
                )
                self.mount_point = f"/mnt/{fname}"
                # update config json
                self.configupdate()
                # success
                print("Mount Complete!")
                # hash all files in mount point
                Tools().hash(self.case_name, self.mount_point,
                             self.secure_store_location)

            # handling compressed images
            elif tarfile.is_tarfile(imagefile):
                print(f"Looks like {filename} is a tarfile")
                print(f"extracting to {sspath}/logical")
                # print(imagefile)
                f = tarfile.open(imagefile)
                f.extractall(path=f'{sspath}/logical')
                f.close()
                print("Extraction complete")

                Tools().hash(self.case_name, f"{sspath}/logical/{filename}",
                             self.secure_store_location)

            elif zipfile.is_zipfile(imagefile):
                print(f"Looks like {filename} is a zipfile")
                print(f"extracting to {sspath}/logical")
                f = zipfile.ZipFile(imagefile)
                f.extractall(path=f'{sspath}/logical')
                print("Extraction complete")
                f.close()

                Tools().hash(self.case_name, f"{sspath}/logical",
                             self.secure_store_location)

        elif os.path.isdir(evidence):
            # zip directory to image folder in securestore
            print(f"{filename} is a directory")
            print(f"{filename} will be compressed to {sspath}/image")
            print(
                f"{filename} will then be extracted to {sspath}/logical/{filename} for analysis"
            )
            """shutil makes archiving dirs easier. No issues with path confusion that
            can be experienced with using zipfile and having to iterate through
            all of the files and subdirs in the root dir"""
            shutil.make_archive(f"{sspath}/image/{filename}", 'zip', evidence)
            # extract from image to logical
            with zipfile.ZipFile(
                    f'{sspath}/image/{filename.split("/")[-1]}.zip',
                    'r') as zip:
                # make directory in logical incase archives are in root of archive
                os.system(f"mkdir {sspath}/logical/{filename}")
                zip.extractall(f"{sspath}/logical/{filename}")

            # Take second hash of logical output for dir
            Tools().hash(self.case_name, f"{sspath}/logical/{filename}",
                         self.secure_store_location)
Ejemplo n.º 50
0
def filetype(filename):
    """Try to guess the type of the file."""
    if os.path.isdir(filename):
        # Potentially a BundleTrajectory
        if BundleTrajectory.is_bundle(filename):
            return 'bundle'
        else:
            raise IOError('Directory: ' + filename)

    fileobj = open(filename)
    s3 = fileobj.read(3)
    if len(s3) == 0:
        raise IOError('Empty file: ' + filename)

    if filename.lower().endswith('.db') or filename.lower().endswith('.cmr'):
        return 'db'

    if is_tarfile(filename):
        return 'gpw'

    if s3 == 'CDF':
        from ase.io.pupynere import NetCDFFile
        nc = NetCDFFile(filename)
        if 'number_of_dynamic_atoms' in nc.dimensions:
            return 'dacapo'

        history = nc.history
        if history == 'GPAW restart file':
            return 'gpw-nc'
        if history == 'ASE trajectory':
            return 'nc'
        if history == 'Dacapo':
            return 'dacapo'
        if hasattr(nc, 'file_format') and nc.file_format.startswith('ETSF'):
            return 'etsf'
        raise IOError('Unknown netCDF file!')

    if is_zipfile(filename):
        return 'vnl'

    fileobj.seek(0)
    lines = fileobj.readlines(1000)

    if lines[0].startswith('PickleTrajectory'):
        return 'traj'

    if lines[1].startswith('OUTER LOOP:') or filename.lower().endswith(
            '.cube'):
        return 'cube'

    if '  ___ ___ ___ _ _ _  \n' in lines:
        return 'gpaw-text'

    if (' &&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&\n'
            in lines[:90]):
        return 'dacapo-text'

    for line in lines:
        if line[0] != '#':
            word = line.strip()
            if word in ['ANIMSTEPS', 'CRYSTAL', 'SLAB', 'POLYMER', 'MOLECULE']:
                return 'xsf'

    filename_v = os.path.basename(filename)
    if 'POSCAR' in filename_v or 'CONTCAR' in filename_v:
        return 'vasp'

    if 'OUTCAR' in filename_v:
        return 'vasp_out'

    if filename.lower().endswith('.exi'):
        return 'exi'

    if filename.lower().endswith('.mol'):
        return 'mol'

    if filename.lower().endswith('.pdb'):
        return 'pdb'

    if filename.lower().endswith('.cif'):
        return 'cif'

    if filename.lower().endswith('.struct'):
        return 'struct'

    if filename.lower().endswith('.struct_out'):
        return 'struct_out'

    for line in lines:
        if 'Invoking FHI-aims ...' in line:
            return 'aims_out'
        if 'atom' in line:
            data = line.split()
            try:
                a = Atoms(symbols=[data[4]],
                          positions=[[
                              float(data[1]),
                              float(data[2]),
                              float(data[3])
                          ]])
                return 'aims'
            except:
                pass

    if filename.lower().endswith('.in'):
        return 'aims'

    if filename.lower().endswith('.cfg'):
        return 'cfg'

    if os.path.split(filename)[1] == 'atoms.dat':
        return 'iwm'

    if filename.endswith('I_info'):
        return 'Cmdft'

    if lines[0].startswith('$coord') or os.path.basename(filename) == 'coord':
        return 'tmol'

    if lines[0].startswith('$grad') or os.path.basename(
            filename) == 'gradient':
        return 'tmol-gradient'

    if lines[0].startswith('Geometry'):
        return 'dftb'

    if filename.lower().endswith('.geom'):
        return 'castep_geom'

    if filename.lower().endswith('.castep'):
        return 'castep'

    if filename.lower().endswith('.cell'):
        return 'castep_cell'
    if s3 == '<?x':
        from ase.io.vtkxml import probe_vtkxml
        xmltype = probe_vtkxml(filename)
        if xmltype == 'ImageData':
            return 'vti'
        elif xmltype == 'StructuredGrid':
            return 'vts'
        elif xmltype == 'UnstructuredGrid':
            return 'vtu'
        elif xmltype is not None:
            raise IOError('Unknown VTK XML file!')

    if filename.lower().endswith('.sdf'):
        return 'sdf'

    if filename.lower().endswith('.gen'):
        return 'gen'

    if 'ITEM: TIMESTEP\n' in lines:
        return 'lammps'

    return 'xyz'
Ejemplo n.º 51
0
def configure_sopcast(latest_version):
    # Configuration for LINUX
    if xbmc.getCondVisibility(
            'system.platform.linux'
    ) and not xbmc.getCondVisibility('system.platform.Android'):
        print("Detected OS: Linux")
        # Linux Armv
        if "arm" in os.uname()[4]:
            print("Sopcast Configuration - LINUX ARM")
            if settings.getSetting('rpi2') == "true":
                print("Raspberry PI 2")
                SPSC_KIT = os.path.join(addonpath,
                                        sopcast_raspberry.split("/")[-1])
                download_tools().Downloader(sopcast_raspberry, SPSC_KIT,
                                            translate(30076), translate(30000))
                if tarfile.is_tarfile(SPSC_KIT):
                    path_libraries = os.path.join(pastaperfil, "sopcast")
                    download_tools().extract(SPSC_KIT, path_libraries)
                    xbmc.sleep(500)
                    download_tools().remove(SPSC_KIT)
                if latest_version:
                    settings.setSetting('sopcast_version',
                                        value=latest_version)
                return

        elif os.uname()[4] == "x86_64":
            generic = False
            if settings.getSetting('openelecx86_64') == "true":
                print("Detected OpenELEC x86_64")
                SPSC_KIT = os.path.join(addonpath,
                                        openelecx86_64_sopcast.split("/")[-1])
                download_tools().Downloader(openelecx86_64_sopcast, SPSC_KIT,
                                            translate(30076), translate(30000))
                if tarfile.is_tarfile(SPSC_KIT):
                    download_tools().extract(SPSC_KIT, pastaperfil)
                    xbmc.sleep(500)
                    download_tools().remove(SPSC_KIT)
                if latest_version:
                    settings.setSetting('sopcast_version',
                                        value=latest_version)
                return
            else:
                generic = True
        elif os.uname()[4] == "i386" or os.uname()[4] == "i686":
            generic = False
            if settings.getSetting('openeleci386') == "true":
                SPSC_KIT = os.path.join(addonpath,
                                        openelecxi386_sopcast.split("/")[-1])
                download_tools().Downloader(openelecxi386_sopcast, SPSC_KIT,
                                            translate(30076), translate(30000))
                if tarfile.is_tarfile(SPSC_KIT):
                    download_tools().extract(SPSC_KIT, pastaperfil)
                    xbmc.sleep(500)
                    download_tools().remove(SPSC_KIT)
                if latest_version:
                    settings.setSetting('sopcast_version',
                                        value=latest_version)
                return
            else:
                generic = True
        if generic:
            SPSC_KIT = os.path.join(addonpath,
                                    sopcast_linux_generico.split("/")[-1])
            download_tools().Downloader(sopcast_linux_generico, SPSC_KIT,
                                        translate(30076), translate(30000))
            if tarfile.is_tarfile(SPSC_KIT):
                path_libraries = os.path.join(pastaperfil, "sopcast")
                download_tools().extract(SPSC_KIT, path_libraries)
                xbmc.sleep(500)
                download_tools().remove(SPSC_KIT)
            # set every single file from the bundle as executable
            path_libraries = os.path.join(pastaperfil, "sopcast")
            dirs, files = xbmcvfs.listdir(path_libraries)
            for ficheiro in files:
                binary_path = os.path.join(path_libraries, ficheiro)
                st = os.stat(binary_path)
                import stat
                os.chmod(binary_path, st.st_mode | stat.S_IEXEC)
            path_libraries = os.path.join(path_libraries, "lib")
            dirs, files = xbmcvfs.listdir(path_libraries)
            for ficheiro in files:
                binary_path = os.path.join(path_libraries, ficheiro)
                st = os.stat(binary_path)
                import stat
                os.chmod(binary_path, st.st_mode | stat.S_IEXEC)
            if latest_version:
                settings.setSetting('sopcast_version', value=latest_version)
            return

    elif xbmc.getCondVisibility('system.platform.windows'):
        print("Detected OS: Windows")
        if not xbmcvfs.exists(pastaperfil):
            xbmcvfs.mkdir(pastaperfil)
        # Sop
        import ctypes
        is_admin = ctypes.windll.shell32.IsUserAnAdmin() != 0
        if not is_admin:
            mensagemok(translate(30000), translate(30077), translate(30078))
        else:
            cmd = ['sc', 'delete', 'sopcastp2p']
            proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True)
            for line in proc.stdout:
                print("cmd out: " + line.rstrip())
            xbmc.sleep(1000)
            ret = mensagemprogresso.create(translate(30000), translate(30000))
            mensagemprogresso.update(0, translate(30117), "  ")
            xbmc.sleep(1000)
            import _winreg
            aReg = _winreg.ConnectRegistry(None, _winreg.HKEY_LOCAL_MACHINE)
            try:
                aKey = _winreg.OpenKey(aReg,
                                       r'SOFTWARE\SopCast\Player\InstallPath',
                                       0, _winreg.KEY_READ)
                name, value, type = _winreg.EnumValue(aKey, 0)
                sopcast_executable = value
                print("Installation executable of sopcast was found: " +
                      sopcast_executable)
                _winreg.CloseKey(aKey)
                mensagemprogresso.update(10, translate(30079),
                                         translate(30080))
            except:
                sopcast_executable = ""
                mensagemok(translate(30000), translate(30081),
                           translate(30082))
            if not sopcast_executable:
                pass
            else:
                xbmc.sleep(1000)
                mensagemprogresso.update(20, translate(30083), "  ")
                xbmc.sleep(1000)
                print("Getting windows users IDS")
                aReg = _winreg.ConnectRegistry(None,
                                               _winreg.HKEY_LOCAL_MACHINE)
                aKey = _winreg.OpenKey(
                    aReg,
                    r'SOFTWARE\Microsoft\Windows NT\CurrentVersion\ProfileList'
                )
                users = []
                for i in range(1024):
                    try:
                        asubkey = _winreg.EnumKey(aKey, i)
                        print(asubkey)
                        aKeydois = _winreg.OpenKey(
                            aReg,
                            os.path.join(
                                'SOFTWARE\Microsoft\Windows NT\CurrentVersion\ProfileList',
                                asubkey))
                        val = _winreg.QueryValueEx(aKeydois,
                                                   "ProfileImagePath")
                        try:
                            print(val[0])
                        except:
                            print(
                                "Notice: User with strange characters, print cmd ignored."
                            )
                        if "Windows" in val[0] or "%systemroot%" in val[0]:
                            pass
                        else:
                            users.append(asubkey)
                    except:
                        pass
                if not users:
                    mensagemok(translate(30000), translate(30084))
                else:
                    mensagemprogresso.update(30, translate(30085),
                                             translate(30080))
                    xbmc.sleep(200)
                    mensagemprogresso.update(30, translate(30086), "   ")
                    xbmc.sleep(1000)
                    print("System Users", users)
                    srvany_final_location = os.path.join(
                        sopcast_executable.replace("SopCast.exe", ""),
                        "srvany.exe")
                    srvany_download_location = os.path.join(
                        addonpath, "srvany.exe")
                    srvanytgz_download_location = os.path.join(
                        addonpath, "srvany.tar.gz")
                    download_tools().Downloader(srvany_executable,
                                                srvanytgz_download_location,
                                                translate(30087),
                                                translate(30000))
                    xbmc.sleep(1000)
                    if tarfile.is_tarfile(srvanytgz_download_location):
                        path_libraries = addonpath
                        download_tools().extract(srvanytgz_download_location,
                                                 path_libraries)
                        xbmcvfs.copy(srvany_download_location,
                                     srvany_final_location)
                        download_tools().remove(srvanytgz_download_location)
                        download_tools().remove(srvany_download_location)
                    xbmc.sleep(1000)
                    ret = mensagemprogresso.create(translate(30000),
                                                   translate(30000))
                    xbmc.sleep(200)
                    mensagemprogresso.update(35, translate(30088), "  ")
                    xbmc.sleep(1000)
                    cmd = [
                        'sc', 'create', 'sopcastp2p', 'binpath=',
                        os.path.join(
                            os.path.join(
                                sopcast_executable.replace("SopCast.exe", "")),
                            'srvany.exe')
                    ]
                    proc = subprocess.Popen(cmd,
                                            stdout=subprocess.PIPE,
                                            shell=True)
                    servicecreator = False
                    for line in proc.stdout:
                        print("cmd out: " + line.rstrip())
                        servicecreator = True
                    if not servicecreator:
                        mensagemok(translate(30000), translate(30089))
                    else:
                        mensagemprogresso.update(40, translate(30088),
                                                 translate(30080))
                        xbmc.sleep(1000)
                        mensagemprogresso.update(45, translate(30090), "  ")
                        xbmc.sleep(1000)
                        print("Trying to modify regedit....")
                        try:
                            aReg = _winreg.ConnectRegistry(
                                None, _winreg.HKEY_LOCAL_MACHINE)
                            key = _winreg.CreateKey(
                                aReg,
                                r'SYSTEM\CurrentControlSet\Services\sopcastp2p\Parameters'
                            )
                            _winreg.SetValueEx(
                                key, 'AppDirectory', 0, _winreg.REG_SZ,
                                os.path.join(
                                    sopcast_executable.replace(
                                        "SopCast.exe", "")))
                            _winreg.SetValueEx(
                                key, 'Application', 0, _winreg.REG_SZ,
                                os.path.join(
                                    os.path.join(
                                        sopcast_executable.replace(
                                            "SopCast.exe", "")),
                                    "SopCast.exe"))
                            _winreg.SetValueEx(key, 'AppParameters', 0,
                                               _winreg.REG_SZ, "sop://")
                            mensagemprogresso.update(50, translate(30090),
                                                     translate(30080))
                            regedit = True
                        except:
                            mensagemok(translate(30000), translate(30091))
                            regedit = False
                        if not regedit:
                            pass
                        else:
                            xbmc.sleep(1000)
                            mensagemprogresso.update(50, translate(30092),
                                                     "   ")
                            cmd = ['sc', 'sdshow', 'sopcastp2p']
                            proc = subprocess.Popen(cmd,
                                                    stdout=subprocess.PIPE,
                                                    shell=True)
                            lines = []
                            for line in proc.stdout:
                                print(line.rstrip())
                                if line.rstrip() != "" and "(" in line.rstrip(
                                ):
                                    lines.append(line.rstrip())
                                else:
                                    pass
                            if len(lines) != 1:
                                mensagemok(translate(30000), translate(30093))
                            else:
                                linha_arr = []
                                for user in users:
                                    linha_arr.append('(A;;RPWPCR;;;' + user +
                                                     ')')
                                linha_add = ''
                                for linha in linha_arr:
                                    linha_add += linha
                                print("line piece to add: " + linha_add)
                                linha_final = lines[0].replace(
                                    "S:(", linha_add + "S:(")
                                print("Final line: " + linha_final)
                                permissions = False
                                xbmc.sleep(500)
                                mensagemprogresso.update(
                                    60, translate(30092), translate(30080))
                                xbmc.sleep(500)
                                mensagemprogresso.update(
                                    60, translate(30094), "   ")
                                cmd = [
                                    'sc', 'sdset', 'sopcastp2p', linha_final
                                ]
                                proc = subprocess.Popen(cmd,
                                                        stdout=subprocess.PIPE,
                                                        shell=True)
                                for line in proc.stdout:
                                    print(line.rstrip())
                                    permissions = True
                                if not permissions:
                                    mensagemok(translate(30000),
                                               translate(30095))
                                else:
                                    mensagemprogresso.update(
                                        70, translate(30094), translate(30080))
                                    xbmc.sleep(1000)
                                    mensagemprogresso.update(
                                        70, translate(30096), "   ")
                                    print(
                                        "Trying to set sopcastp2p service regedit permissions..."
                                    )
                                    download_tools().Downloader(
                                        srvany_permissions,
                                        os.path.join(
                                            pastaperfil,
                                            "sopcastp2p-permissions.txt"),
                                        translate(30097), translate(30000))
                                    xbmc.sleep(500)
                                    ret = mensagemprogresso.create(
                                        translate(30000), translate(30000))
                                    xbmc.sleep(500)
                                    mensagemprogresso.update(
                                        80, translate(30098), "   ")
                                    xbmc.sleep(1000)
                                    cmd = [
                                        'regini',
                                        os.path.join(
                                            pastaperfil,
                                            "sopcastp2p-permissions.txt")
                                    ]
                                    proc = subprocess.Popen(
                                        cmd,
                                        stdout=subprocess.PIPE,
                                        shell=True)
                                    for line in proc.stdout:
                                        print(line.rstrip())
                                    mensagemprogresso.update(
                                        90, translate(30098), translate(30098))
                                    mensagemprogresso.update(
                                        100, translate(30099), "   ")
                                    xbmc.sleep(2000)
                                    mensagemprogresso.close()
                                    if latest_version:
                                        settings.setSetting(
                                            'sopcast_version',
                                            value=latest_version)
                                    return

    elif xbmc.getCondVisibility('System.Platform.OSX'):
        print("Detected OS: Mac OSX")
        available = False
        if os.uname()[-1] == "x86_64":
            mac_package = osx_x64_sopcast
            available = True
        elif os.uname()[-1] == "i386":
            mac_package = osx_i386_sopcast
            available = True
        else:
            available = False
        if available:
            if not os.path.exists(pastaperfil):
                xbmcvfs.mkdir(pastaperfil)
            MAC_KIT = os.path.join(addonpath, mac_package.split("/")[-1])
            download_tools().Downloader(mac_package, MAC_KIT, translate(30076),
                                        translate(30000))
            if tarfile.is_tarfile(MAC_KIT):
                path_libraries = os.path.join(pastaperfil)
                download_tools().extract(MAC_KIT, pastaperfil)
                download_tools().remove(MAC_KIT)
                sp_sc_auth = os.path.join(pastaperfil, "sopcast", "sp-sc-auth")
                st = os.stat(sp_sc_auth)
                import stat
                os.chmod(sp_sc_auth, st.st_mode | stat.S_IEXEC)
            if latest_version:
                settings.setSetting('sopcast_version', value=latest_version)
            return
        else:
            mensagemok(translate(30000), translate(30100))
            return

    elif xbmc.getCondVisibility('System.Platform.Android'):

        print("Detected OS: Android")
        # Sopcast configuration
        print("Starting SopCast Configuration")

        # Moving sopclient to ext4 hack - tks steeve from xbmctorrent

        sopclient_builtin_location = os.path.join(addonpath, "resources",
                                                  "binaries", "sopclient")

        # Hack to get current xbmc app id
        xbmcfolder = xbmc.translatePath(addonpath).split("/")

        found = False
        if settings.getSetting('auto_appid') == 'true':
            i = 0
            sopcast_installed = False
            for folder in xbmcfolder:
                if folder.count('.') >= 2 and folder != addon_id:
                    found = True
                    break
                else:
                    i += 1
            if found:
                uid = os.getuid()
                app_id = xbmcfolder[i]
        else:
            if settings.getSetting('custom_appid') != '':
                uid = os.getuid()
                app_id = settings.getSetting('custom_appid')
                found = True

        if found:
            xbmc_data_path = os.path.join("/data", "data", app_id)

            if os.path.exists(xbmc_data_path) and uid == os.stat(
                    xbmc_data_path).st_uid:
                android_binary_dir = os.path.join(xbmc_data_path, "files",
                                                  "program.plexus")
                if not os.path.exists(android_binary_dir):
                    os.makedirs(android_binary_dir)
                android_binary_path = os.path.join(android_binary_dir,
                                                   "sopclient")
                if not os.path.exists(android_binary_path) or os.path.getsize(
                        android_binary_path) != os.path.getsize(
                            sopclient_builtin_location):
                    shutil.copy2(sopclient_builtin_location,
                                 android_binary_path)
                binary_path = android_binary_path
                st = os.stat(binary_path)
                import stat
                os.chmod(binary_path, st.st_mode | stat.S_IEXEC)
                settings.setSetting('android_sopclient', value=binary_path)
                opcao = xbmcgui.Dialog().yesno(translate(30000),
                                               translate(30101),
                                               translate(30103))
                if not opcao:
                    settings.setSetting('external-sopcast', value='0')
                    sopcast_installed = True
                    mensagemok(translate(30000), translate(30099))
                else:
                    mensagemok(translate(30000), translate(30104))
                    if os.path.exists(os.path.join("sdcard", "Download")):
                        pasta = os.path.join("sdcard", "Download")
                        sopfile = os.path.join("sdcard", "Download",
                                               sopcast_apk.split("/")[-1])
                    else:
                        dialog = xbmcgui.Dialog()
                        pasta = dialog.browse(int(0), translate(30105),
                                              'videos')
                        sopfile = os.path.join(pasta,
                                               sopcast_apk.split("/")[-1])
                    download_tools().Downloader(sopcast_apk, sopfile,
                                                translate(30106),
                                                translate(30000))
                    if tarfile.is_tarfile(sopfile):
                        download_tools().extract(sopfile, pasta)
                        download_tools().remove(sopfile)
                    mensagemok(translate(30000), translate(30107), pasta,
                               translate(30108))
                    sopcast_installed = True
                    settings.setSetting('external-sopcast', value='1')
                    mensagemok(translate(30000), translate(30099))
                if latest_version:
                    settings.setSetting('sopcast_version',
                                        value=latest_version)
                return

        else:
            mensagemok(translate(30000), translate(30109))
            return
Ejemplo n.º 52
0
def maybe_download_and_extract(filename, working_directory, url_source, extract=False, expected_bytes=None):
    """Checks if file exists in working_directory otherwise tries to dowload the file,
    and optionally also tries to extract the file if format is ".zip" or ".tar"

    Parameters
    ----------
    filename : string
        The name of the (to be) dowloaded file.
    working_directory : string
        A folder path to search for the file in and dowload the file to
    url : string
        The URL to download the file from
    extract : bool, defaults to False
        If True, tries to uncompress the dowloaded file is ".tar.gz/.tar.bz2" or ".zip" file
    expected_bytes : int/None
        If set tries to verify that the downloaded file is of the specified size, otherwise raises an Exception,
        defaults to None which corresponds to no check being performed
    Returns
    ----------
    filepath to dowloaded (uncompressed) file

    Examples
    --------
    >>> down_file = tl.files.maybe_download_and_extract(filename = 'train-images-idx3-ubyte.gz',
                                                        working_directory = 'data/',
                                                        url_source = 'http://yann.lecun.com/exdb/mnist/')
    >>> tl.files.maybe_download_and_extract(filename = 'ADEChallengeData2016.zip',
                                            working_directory = 'data/',
                                            url_source = 'http://sceneparsing.csail.mit.edu/data/',
                                            extract=True)
    """
    # We first define a download function, supporting both Python 2 and 3.
    def _download(filename, working_directory, url_source):
        def _dlProgress(count, blockSize, totalSize):
            if(totalSize != 0):
                percent = float(count * blockSize) / float(totalSize) * 100.0
                sys.stdout.write("\r" "Downloading " + filename + "...%d%%" % percent)
                sys.stdout.flush()
        if sys.version_info[0] == 2:
            from urllib import urlretrieve
        else:
            from urllib.request import urlretrieve
        filepath = os.path.join(working_directory, filename)
        urlretrieve(url_source+filename, filepath, reporthook=_dlProgress)

    exists_or_mkdir(working_directory, verbose=False)
    filepath = os.path.join(working_directory, filename)

    if not os.path.exists(filepath):
        _download(filename, working_directory, url_source)
        print()
        statinfo = os.stat(filepath)
        print('Succesfully downloaded', filename, statinfo.st_size, 'bytes.')
        if(not(expected_bytes is None) and (expected_bytes != statinfo.st_size)):
            raise Exception('Failed to verify ' + filename + '. Can you get to it with a browser?')
        if(extract):
            if tarfile.is_tarfile(filepath):
                print('Trying to extract tar file')
                tarfile.open(filepath, 'r').extractall(working_directory)
                print('... Success!')
            elif zipfile.is_zipfile(filepath):
                print('Trying to extract zip file')
                with zipfile.ZipFile(filepath) as zf:
                    zf.extractall(working_directory)
                print('... Success!')
            else:
                print("Unknown compression_format only .tar.gz/.tar.bz2/.tar and .zip supported")
    return filepath
Ejemplo n.º 53
0
def cached_path(
    url_or_filename,
    download_config=None,
    **download_kwargs,
) -> Optional[str]:
    """
    Given something that might be a URL (or might be a local path),
    determine which. If it's a URL, download the file and cache it, and
    return the path to the cached file. If it's already a local path,
    make sure the file exists and then return the path.

    Return:
        Local path (string)

    Raises:
        FileNotFoundError: in case of non-recoverable file
            (non-existent or no cache on disk)
        ConnectionError: in case of unreachable url
            and no cache on disk
        ValueError: if it couldn't parse the url or filename correctly
    """
    if download_config is None:
        download_config = DownloadConfig(**download_kwargs)

    cache_dir = download_config.cache_dir or HF_DATASETS_CACHE
    if isinstance(cache_dir, Path):
        cache_dir = str(cache_dir)
    if isinstance(url_or_filename, Path):
        url_or_filename = str(url_or_filename)

    if is_remote_url(url_or_filename):
        # URL, so get it from the cache (downloading if necessary)
        output_path = get_from_cache(
            url_or_filename,
            cache_dir=cache_dir,
            force_download=download_config.force_download,
            proxies=download_config.proxies,
            resume_download=download_config.resume_download,
            user_agent=download_config.user_agent,
            local_files_only=download_config.local_files_only,
            use_etag=download_config.use_etag,
        )
    elif os.path.exists(url_or_filename):
        # File, and it exists.
        output_path = url_or_filename
    elif urlparse(url_or_filename).scheme == "":
        # File, but it doesn't exist.
        raise FileNotFoundError(
            "Local file {} doesn't exist".format(url_or_filename))
    else:
        # Something unknown
        raise ValueError(
            "unable to parse {} as a URL or as a local path".format(
                url_or_filename))

    if download_config.extract_compressed_file and output_path is not None:
        if not is_zipfile(output_path) and not tarfile.is_tarfile(
                output_path) and not is_gzip(output_path):
            return output_path

        # Path where we extract compressed archives
        # We extract in the cache dir, and get the extracted path name by hashing the original path"
        abs_output_path = os.path.abspath(output_path)
        output_path_extracted = os.path.join(
            cache_dir, "extracted", hash_url_to_filename(abs_output_path))

        if (os.path.isdir(output_path_extracted)
                and os.listdir(output_path_extracted)
                and not download_config.force_extract) or (
                    os.path.isfile(output_path_extracted)
                    and not download_config.force_extract):
            return output_path_extracted

        # Prevent parallel extractions
        lock_path = output_path + ".lock"
        with FileLock(lock_path):
            shutil.rmtree(output_path_extracted, ignore_errors=True)
            os.makedirs(output_path_extracted, exist_ok=True)
            if tarfile.is_tarfile(output_path):
                tar_file = tarfile.open(output_path)
                tar_file.extractall(output_path_extracted)
                tar_file.close()
            elif is_gzip(output_path):
                os.rmdir(output_path_extracted)
                with gzip.open(output_path, "rb") as gzip_file:
                    with open(output_path_extracted, "wb") as extracted_file:
                        shutil.copyfileobj(gzip_file, extracted_file)
            elif is_zipfile(
                    output_path
            ):  # put zip file to the last, b/c it is possible wrongly detected as zip
                with ZipFile(output_path, "r") as zip_file:
                    zip_file.extractall(output_path_extracted)
                    zip_file.close()
            else:
                raise EnvironmentError(
                    "Archive format of {} could not be identified".format(
                        output_path))

        return output_path_extracted

    return output_path
Ejemplo n.º 54
0
def configure_acestream(latest_version):
    # Configuration for LINUX
    if xbmc.getCondVisibility(
            'system.platform.linux'
    ) and not xbmc.getCondVisibility('system.platform.Android'):
        print("Detected OS: Linux")
        if "arm" in os.uname()[4]:
            print("Linux Arm")
            if settings.getSetting('rpi2') == "true":
                ACE_KIT = os.path.join(addonpath, "resources", "binaries",
                                       "acestream_rpi.tar.gz")
                if tarfile.is_tarfile(ACE_KIT):
                    path_libraries = os.path.join(pastaperfil)
                    download_tools().extract(ACE_KIT, path_libraries)
                    xbmc.sleep(500)
                # set chroot to executable
                binary_path = os.path.join(pastaperfil, "acestream", "chroot")
                st = os.stat(binary_path)
                import stat
                os.chmod(binary_path, st.st_mode | stat.S_IEXEC)
                if latest_version:
                    settings.setSetting('acestream_version',
                                        value=latest_version)
                return

        elif os.uname()[4] == "x86_64":
            if settings.getSetting('openelecx86_64') == "true":
                print("OpenELEC x86_64 Acestream configuration")
                ACE_KIT = os.path.join(addonpath,
                                       openeelcx86_64_acestream.split("/")[-1])
                download_tools().Downloader(openeelcx86_64_acestream, ACE_KIT,
                                            translate(30110), translate(30000))
                if tarfile.is_tarfile(ACE_KIT):
                    download_tools().extract(ACE_KIT, pastaperfil)
                    xbmc.sleep(500)
                    download_tools().remove(ACE_KIT)
                if latest_version:
                    settings.setSetting('acestream_version',
                                        value=latest_version)
                return

            else:
                print("64 bit Linux Disto Acestream Configuration")
                ACE_KIT = os.path.join(
                    addonpath,
                    acestream_linux_x64_generic.split("/")[-1])
                download_tools().Downloader(acestream_linux_x64_generic,
                                            ACE_KIT, translate(30110),
                                            translate(30000))
                if tarfile.is_tarfile(ACE_KIT):
                    download_tools().extract(ACE_KIT, pastaperfil)
                    xbmc.sleep(500)
                    download_tools().remove(ACE_KIT)
                if latest_version:
                    settings.setSetting('acestream_version',
                                        value=latest_version)
                return

        elif os.uname()[4] == "i386" or os.uname()[4] == "i686":
            if settings.getSetting('openeleci386') == "true":
                print("32 bit Openelec Acestream Configuration")
                ACE_KIT = os.path.join(addonpath,
                                       openeelcxi386_acestream.split("/")[-1])
                download_tools().Downloader(openeelcxi386_acestream, ACE_KIT,
                                            translate(30110), translate(30000))
                if tarfile.is_tarfile(ACE_KIT):
                    download_tools().extract(ACE_KIT, pastaperfil)
                    xbmc.sleep(500)
                    download_tools().remove(ACE_KIT)
                if latest_version:
                    settings.setSetting('acestream_version',
                                        value=latest_version)
                return
            else:
                print("32 bit Linux general distro Acestream Configuration")
                ACE_KIT = os.path.join(
                    addonpath,
                    acestream_linux_i386_generic.split("/")[-1])
                download_tools().Downloader(acestream_linux_i386_generic,
                                            ACE_KIT, translate(30110),
                                            translate(30000))
                if tarfile.is_tarfile(ACE_KIT):
                    download_tools().extract(ACE_KIT, pastaperfil)
                    xbmc.sleep(500)
                    download_tools().remove(ACE_KIT)
                if latest_version:
                    settings.setSetting('acestream_version',
                                        value=latest_version)
                return

    elif xbmc.getCondVisibility('system.platform.windows'):
        print("Detected OS: Windows")
        if not os.path.exists(pastaperfil):
            xbmcvfs.mkdir(pastaperfil)
        # Ace
        SPSC_KIT = os.path.join(addonpath, acestream_windows.split("/")[-1])
        download_tools().Downloader(acestream_windows, SPSC_KIT,
                                    translate(30110), translate(30000))
        if os.path.exists(os.path.join(pastaperfil, "acestream")):
            shutil.rmtree(os.path.join(pastaperfil, "acestream"))
        if os.path.exists(os.path.join(pastaperfil, "player")):
            shutil.rmtree(os.path.join(pastaperfil, "player"))
        if tarfile.is_tarfile(SPSC_KIT):
            path_libraries = os.path.join(pastaperfil)
            download_tools().extract(SPSC_KIT, path_libraries)
            download_tools().remove(SPSC_KIT)
        if latest_version:
            settings.setSetting('acestream_version', value=latest_version)
        return

    elif xbmc.getCondVisibility('System.Platform.OSX'):
        print("Detected OS: Mac OSX")
        available = False
        if os.uname()[-1] == "x86_64":
            mac_package = osx_x64_acestream
            available = True
        elif os.uname()[-1] == "i386":
            mac_package = osx_i386_acestream
            available = True
        else:
            available = False
        if available:
            MAC_KIT = os.path.join('/Applications', mac_package.split("/")[-1])
            if not xbmcvfs.exists(
                    os.path.join('/Applications', 'Ace Stream.app')):
                download_tools().Downloader(mac_package, MAC_KIT,
                                            translate(30110), translate(30000))
                if xbmcvfs.exists(MAC_KIT):
                    xbmc.sleep(1000)
                    cmd = 'unzip /Applications/AceStreamWineOSX.zip'
                    zipa = subprocess.Popen(cmd, shell=True)
                    cmd = 'chmod -R 755 /Applications/Ace\ Stream.app'
                    print cmd
                    chmod = subprocess.Popen(cmd, shell=True)
                    try:
                        os.remove(MAC_KIT)
                    except:
                        pass
            if latest_version:
                settings.setSetting('acestream_version', value=latest_version)
            return
        else:
            mensagemok(translate(30000), translate(30100))
            return

    elif xbmc.getCondVisibility('System.Platform.Android'):

        print("Detected OS: Android")
        print("Starting Acestream Configuration")
        # acestream config for android
        if not os.path.exists(pastaperfil):
            xbmcvfs.mkdir(pastaperfil)
        # Hack to get xbmc app id
        xbmcfolder = xbmc.translatePath(addonpath).split("/")
        found = False
        if settings.getSetting('auto_appid') == 'true':
            i = 0
            sopcast_installed = False
            for folder in xbmcfolder:
                if folder.count('.') >= 2 and folder != addon_id:
                    found = True
                    break
                else:
                    i += 1
            if found:
                uid = os.getuid()
                app_id = xbmcfolder[i]
        else:
            if settings.getSetting('custom_appid') != '':
                uid = os.getuid()
                app_id = settings.getSetting('custom_appid')
                found = True

        if found:
            settings.setSetting('app_id', app_id)
            # Acestreamconfiguration for android starts here
            if "arm" in os.uname()[4]:
                acebundle = os.path.join(pastaperfil,
                                         android_aceengine_arm.split("/")[-1])
                download_tools().Downloader(android_aceengine_arm, acebundle,
                                            translate(30111), translate(30000))
            else:
                acebundle = os.path.join(pastaperfil,
                                         android_aceengine_x86.split("/")[-1])
                download_tools().Downloader(android_aceengine_x86, acebundle,
                                            translate(30111), translate(30000))
            if tarfile.is_tarfile(acebundle):
                download_tools().extract(acebundle, pastaperfil)
                download_tools().remove(acebundle)
            orgacestreamenginefolder = os.path.join(pastaperfil,
                                                    "org.acestream.engine")
            xbmc_data_path = os.path.join("/data", "data", app_id)
            if os.path.exists(xbmc_data_path) and uid == os.stat(
                    xbmc_data_path).st_uid:
                android_binary_dir = os.path.join(xbmc_data_path, "files",
                                                  "program.plexus")
                if not os.path.exists(android_binary_dir):
                    os.makedirs(android_binary_dir)
            android_acestream_folder = os.path.join(android_binary_dir,
                                                    "org.acestream.engine")
            if not os.path.exists(android_acestream_folder):
                os.makedirs(android_acestream_folder)
            else:
                # clean install for android - delete old folder
                print android_acestream_folder
                try:
                    os.system("chmod -R 777 " + android_acestream_folder +
                              "/*")
                    os.system("rm -r '" + android_acestream_folder + "'")
                except:
                    pass
                try:
                    os.makedirs(android_acestream_folder)
                except:
                    pass
            xbmc.sleep(200)
            # clean install in android - remove /sdcard/.ACEStream folder if it exists (to be enabled between versions if we need to remove older settings
            # if os.path.exists(os.path.join('/sdcard','.ACEStream')):
            # 	try:
            # 		hidden_ace = os.path.join('/sdcard','.ACEStream')
            # 		os.system("chmod -R 777 "+hidden_ace+"/*")
            # 		os.system("rm -r '"+hidden_ace+"'")
            # 	except: pass
            recursive_overwrite(orgacestreamenginefolder,
                                android_acestream_folder,
                                ignore=None)
            pythonbin = os.path.join(android_acestream_folder, "files",
                                     "python", "bin", "python")
            st = os.stat(pythonbin)
            import stat
            os.chmod(pythonbin, st.st_mode | stat.S_IEXEC)
            if os.path.exists(orgacestreamenginefolder):
                try:
                    os.system("chmod -R 777 " + orgacestreamenginefolder +
                              "/*")
                    os.system("rm -r '" + orgacestreamenginefolder + "'")
                except:
                    pass
            try:
                xbmcvfs.mkdir(os.path.join('/sdcard', 'org.acestream.engine'))
            except:
                pass
            opcao = xbmcgui.Dialog().yesno(translate(30000), translate(30112),
                                           translate(30113))
            if not opcao:
                settings.setSetting('engine_app', '0')
            else:
                mensagemok(translate(30000), translate(30114),
                           translate(30115), translate(30116))
                if os.path.exists(os.path.join("sdcard", "Download")):
                    pasta = os.path.join("sdcard", "Download")
                    if "arm" in os.uname()[4]:
                        acefile = os.path.join(
                            "sdcard", "Download",
                            acestreamengine_apk_arm.split("/")[-1])
                    else:
                        acefile = os.path.join(
                            "sdcard", "Download",
                            acestreamengine_apk_x86.split("/")[-1])
                else:
                    dialog = xbmcgui.Dialog()
                    pasta = dialog.browse(int(0), translate(30105),
                                          'myprograms')
                    if "arm" in os.uname()[4]:
                        acefile = os.path.join(
                            pasta,
                            acestreamengine_apk_arm.split("/")[-1])
                    else:
                        acefile = os.path.join(
                            pasta,
                            acestreamengine_apk_x86.split("/")[-1])
                if "arm" in os.uname()[4]:
                    download_tools().Downloader(acestreamengine_apk_arm,
                                                acefile, translate(30117),
                                                translate(30000))
                else:
                    download_tools().Downloader(acestreamengine_apk_x86,
                                                acefile, translate(30117),
                                                translate(30000))
                if tarfile.is_tarfile(acefile):
                    download_tools().extract(acefile, pasta)
                    download_tools().remove(acefile)
                xbmc.sleep(2000)
                mensagemok(translate(30000), translate(30118), pasta,
                           translate(30108))
                mensagemok(translate(30000), translate(30119),
                           translate(30120), translate(30121))
                settings.setSetting('engine_app', '1')
            opcao = xbmcgui.Dialog().yesno(translate(30000), translate(30122),
                                           translate(30123))
            if opcao:
                if os.path.exists(os.path.join("sdcard", "Download")):
                    pasta = os.path.join("sdcard", "Download")
                    if "arm" in os.uname()[4]:
                        acefile = os.path.join(
                            "sdcard", "Download",
                            android_aceplayer_arm.split("/")[-1])
                    else:
                        os.path.join("sdcard", "Download",
                                     android_aceplayer_x86.split("/")[-1])
                else:
                    dialog = xbmcgui.Dialog()
                    pasta = dialog.browse(int(0), translate(30105),
                                          'myprograms')
                    if "arm" in os.uname()[4]:
                        acefile = os.path.join(
                            pasta,
                            acestreamengine_apk_arm.split("/")[-1])
                    else:
                        acefile = os.path.join(
                            pasta,
                            acestreamengine_apk_x86.split("/")[-1])
                if "arm" in os.uname()[4]:
                    download_tools().Downloader(android_aceplayer_arm, acefile,
                                                translate(30124),
                                                translate(30000))
                else:
                    download_tools().Downloader(android_aceplayer_x86, acefile,
                                                translate(30124),
                                                translate(30000))
                if tarfile.is_tarfile(acefile):
                    download_tools().extract(acefile, pasta)
                    download_tools().remove(acefile)
                xbmc.sleep(2000)
                mensagemok(translate(30000), translate(30125), pasta,
                           translate(30108))
                opcao = xbmcgui.Dialog().yesno(translate(30000),
                                               translate(30126))
                if opcao:
                    settings.setSetting('engine_app', '2')
            if latest_version:
                settings.setSetting('acestream_version', value=latest_version)
            mensagemok(translate(30000), translate(30127))
            return
        else:
            mensagemok(translate(30000), translate(30109))
            return
Ejemplo n.º 55
0
def _uncompress_file(file_, delete_archive=True, verbose=1):
    """Uncompress files contained in a data_set.

    Parameters
    ----------
    file: string
        path of file to be uncompressed.

    delete_archive: bool, optional
        Wheteher or not to delete archive once it is uncompressed.
        Default: True

    verbose: int, optional
        verbosity level (0 means no message).

    Notes
    -----
    This handles zip, tar, gzip and bzip files only.
    """
    if verbose > 0:
        sys.stderr.write('Extracting data from %s...' % file_)
    data_dir = os.path.dirname(file_)
    # We first try to see if it is a zip file
    try:
        filename, ext = os.path.splitext(file_)
        with open(file_, "rb") as fd:
            header = fd.read(4)
        processed = False
        if zipfile.is_zipfile(file_):
            z = zipfile.ZipFile(file_)
            z.extractall(path=data_dir)
            z.close()
            if delete_archive:
                os.remove(file_)
            file_ = filename
            processed = True
        elif ext == '.gz' or header.startswith(b'\x1f\x8b'):
            import gzip
            if ext == '.tgz':
                filename = filename + '.tar'
            elif ext == '':
                # For gzip file, we rely on the assumption that there is an extenstion
                shutil.move(file_, file_ + '.gz')
                file_ = file_ + '.gz'
            gz = gzip.open(file_)
            out = open(filename, 'wb')
            shutil.copyfileobj(gz, out, 8192)
            gz.close()
            out.close()
            # If file is .tar.gz, this will be handle in the next case
            if delete_archive:
                os.remove(file_)
            file_ = filename
            processed = True
        if os.path.isfile(file_) and tarfile.is_tarfile(file_):
            with contextlib.closing(tarfile.open(file_, "r")) as tar:
                tar.extractall(path=data_dir)
            if delete_archive:
                os.remove(file_)
            processed = True
        if not processed:
            raise IOError("[Uncompress] unknown archive file format: %s" %
                          file_)

        if verbose > 0:
            sys.stderr.write('.. done.\n')
    except Exception as e:
        if verbose > 0:
            print('Error uncompressing file: %s' % e)
        raise
Ejemplo n.º 56
0
def import_data_dj(
    in_path,
    group=None,
    ignore_unknown_nodes=False,
    extras_mode_existing='kcl',
    extras_mode_new='import',
    comment_mode='newest',
    silent=False
):
    """Import exported AiiDA archive to the AiiDA database and repository.

    Specific for the Django backend.
    If ``in_path`` is a folder, calls extract_tree; otherwise, tries to detect the compression format
    (zip, tar.gz, tar.bz2, ...) and calls the correct function.

    :param in_path: the path to a file or folder that can be imported in AiiDA.
    :type in_path: str

    :param group: Group wherein all imported Nodes will be placed.
    :type group: :py:class:`~aiida.orm.groups.Group`

    :param extras_mode_existing: 3 letter code that will identify what to do with the extras import.
        The first letter acts on extras that are present in the original node and not present in the imported node.
        Can be either:
        'k' (keep it) or
        'n' (do not keep it).
        The second letter acts on the imported extras that are not present in the original node.
        Can be either:
        'c' (create it) or
        'n' (do not create it).
        The third letter defines what to do in case of a name collision.
        Can be either:
        'l' (leave the old value),
        'u' (update with a new value),
        'd' (delete the extra), or
        'a' (ask what to do if the content is different).
    :type extras_mode_existing: str

    :param extras_mode_new: 'import' to import extras of new nodes or 'none' to ignore them.
    :type extras_mode_new: str

    :param comment_mode: Comment import modes (when same UUIDs are found).
        Can be either:
        'newest' (will keep the Comment with the most recent modification time (mtime)) or
        'overwrite' (will overwrite existing Comments with the ones from the import file).
    :type comment_mode: str

    :param silent: suppress prints.
    :type silent: bool

    :return: New and existing Nodes and Links.
    :rtype: dict

    :raises `~aiida.tools.importexport.common.exceptions.ImportValidationError`: if parameters or the contents of
        `metadata.json` or `data.json` can not be validated.
    :raises `~aiida.tools.importexport.common.exceptions.CorruptArchive`: if the provided archive at ``in_path`` is
        corrupted.
    :raises `~aiida.tools.importexport.common.exceptions.IncompatibleArchiveVersionError`: if the provided archive's
        export version is not equal to the export version of AiiDA at the moment of import.
    :raises `~aiida.tools.importexport.common.exceptions.ArchiveImportError`: if there are any internal errors when
        importing.
    :raises `~aiida.tools.importexport.common.exceptions.ImportUniquenessError`: if a new unique entity can not be
        created.
    """
    from django.db import transaction  # pylint: disable=import-error,no-name-in-module
    from aiida.backends.djsite.db import models

    # This is the export version expected by this function
    expected_export_version = StrictVersion(EXPORT_VERSION)

    # The returned dictionary with new and existing nodes and links
    ret_dict = {}

    # Initial check(s)
    if group:
        if not isinstance(group, Group):
            raise exceptions.ImportValidationError('group must be a Group entity')
        elif not group.is_stored:
            group.store()

    ################
    # EXTRACT DATA #
    ################
    # The sandbox has to remain open until the end
    with SandboxFolder() as folder:
        if os.path.isdir(in_path):
            extract_tree(in_path, folder)
        else:
            if tarfile.is_tarfile(in_path):
                extract_tar(in_path, folder, silent=silent, nodes_export_subfolder=NODES_EXPORT_SUBFOLDER)
            elif zipfile.is_zipfile(in_path):
                try:
                    extract_zip(in_path, folder, silent=silent, nodes_export_subfolder=NODES_EXPORT_SUBFOLDER)
                except ValueError as exc:
                    print('The following problem occured while processing the provided file: {}'.format(exc))
                    return
            else:
                raise exceptions.ImportValidationError(
                    'Unable to detect the input file format, it is neither a '
                    '(possibly compressed) tar file, nor a zip file.'
                )

        if not folder.get_content_list():
            raise exceptions.CorruptArchive('The provided file/folder ({}) is empty'.format(in_path))
        try:
            with io.open(folder.get_abs_path('metadata.json'), 'r', encoding='utf8') as fhandle:
                metadata = json.load(fhandle)

            with io.open(folder.get_abs_path('data.json'), 'r', encoding='utf8') as fhandle:
                data = json.load(fhandle)
        except IOError as error:
            raise exceptions.CorruptArchive(
                'Unable to find the file {} in the import file or folder'.format(error.filename)
            )

        ######################
        # PRELIMINARY CHECKS #
        ######################
        export_version = StrictVersion(str(metadata['export_version']))
        if export_version != expected_export_version:
            msg = 'Export file version is {}, can import only version {}'\
                    .format(metadata['export_version'], expected_export_version)
            if export_version < expected_export_version:
                msg += "\nUse 'verdi export migrate' to update this export file."
            else:
                msg += '\nUpdate your AiiDA version in order to import this file.'

            raise exceptions.IncompatibleArchiveVersionError(msg)

        ##########################################################################
        # CREATE UUID REVERSE TABLES AND CHECK IF I HAVE ALL NODES FOR THE LINKS #
        ##########################################################################
        linked_nodes = set(chain.from_iterable((l['input'], l['output']) for l in data['links_uuid']))
        group_nodes = set(chain.from_iterable(six.itervalues(data['groups_uuid'])))

        if NODE_ENTITY_NAME in data['export_data']:
            import_nodes_uuid = set(v['uuid'] for v in data['export_data'][NODE_ENTITY_NAME].values())
        else:
            import_nodes_uuid = set()

        # the combined set of linked_nodes and group_nodes was obtained from looking at all the links
        # the set of import_nodes_uuid was received from the stuff actually referred to in export_data
        unknown_nodes = linked_nodes.union(group_nodes) - import_nodes_uuid

        if unknown_nodes and not ignore_unknown_nodes:
            raise exceptions.DanglingLinkError(
                'The import file refers to {} nodes with unknown UUID, therefore it cannot be imported. Either first '
                'import the unknown nodes, or export also the parents when exporting. The unknown UUIDs are:\n'
                ''.format(len(unknown_nodes)) + '\n'.join('* {}'.format(uuid) for uuid in unknown_nodes)
            )

        ###################################
        # DOUBLE-CHECK MODEL DEPENDENCIES #
        ###################################
        # The entity import order. It is defined by the database model relationships.

        model_order = (
            USER_ENTITY_NAME, COMPUTER_ENTITY_NAME, NODE_ENTITY_NAME, GROUP_ENTITY_NAME, LOG_ENTITY_NAME,
            COMMENT_ENTITY_NAME
        )

        for import_field_name in metadata['all_fields_info']:
            if import_field_name not in model_order:
                raise exceptions.ImportValidationError(
                    "You are trying to import an unknown model '{}'!".format(import_field_name)
                )

        for idx, model_name in enumerate(model_order):
            dependencies = []
            for field in metadata['all_fields_info'][model_name].values():
                try:
                    dependencies.append(field['requires'])
                except KeyError:
                    # (No ForeignKey)
                    pass
            for dependency in dependencies:
                if dependency not in model_order[:idx]:
                    raise exceptions.ArchiveImportError(
                        'Model {} requires {} but would be loaded first; stopping...'.format(model_name, dependency)
                    )

        ###################################################
        # CREATE IMPORT DATA DIRECT UNIQUE_FIELD MAPPINGS #
        ###################################################
        import_unique_ids_mappings = {}
        for model_name, import_data in data['export_data'].items():
            if model_name in metadata['unique_identifiers']:
                # I have to reconvert the pk to integer
                import_unique_ids_mappings[model_name] = {
                    int(k): v[metadata['unique_identifiers'][model_name]] for k, v in import_data.items()
                }

        ###############
        # IMPORT DATA #
        ###############
        # DO ALL WITH A TRANSACTION
        with transaction.atomic():
            foreign_ids_reverse_mappings = {}
            new_entries = {}
            existing_entries = {}

            # I first generate the list of data
            for model_name in model_order:
                cls_signature = entity_names_to_signatures[model_name]
                model = get_object_from_string(cls_signature)
                fields_info = metadata['all_fields_info'].get(model_name, {})
                unique_identifier = metadata['unique_identifiers'].get(model_name, None)

                new_entries[model_name] = {}
                existing_entries[model_name] = {}

                foreign_ids_reverse_mappings[model_name] = {}

                # Not necessarily all models are exported
                if model_name in data['export_data']:

                    # skip nodes that are already present in the DB
                    if unique_identifier is not None:
                        import_unique_ids = set(v[unique_identifier] for v in data['export_data'][model_name].values())

                        relevant_db_entries_result = model.objects.filter(
                            **{'{}__in'.format(unique_identifier): import_unique_ids}
                        )
                        # Note: uuids need to be converted to strings
                        relevant_db_entries = {
                            str(getattr(n, unique_identifier)): n for n in relevant_db_entries_result
                        }

                        foreign_ids_reverse_mappings[model_name] = {k: v.pk for k, v in relevant_db_entries.items()}
                        for key, value in data['export_data'][model_name].items():
                            if value[unique_identifier] in relevant_db_entries.keys():
                                # Already in DB
                                existing_entries[model_name][key] = value
                            else:
                                # To be added
                                new_entries[model_name][key] = value
                    else:
                        new_entries[model_name] = data['export_data'][model_name].copy()

            # Show Comment mode if not silent
            if not silent:
                print('Comment mode: {}'.format(comment_mode))

            # I import data from the given model
            for model_name in model_order:
                cls_signature = entity_names_to_signatures[model_name]
                model = get_object_from_string(cls_signature)
                fields_info = metadata['all_fields_info'].get(model_name, {})
                unique_identifier = metadata['unique_identifiers'].get(model_name, None)

                # EXISTING ENTRIES
                for import_entry_pk, entry_data in existing_entries[model_name].items():
                    unique_id = entry_data[unique_identifier]
                    existing_entry_id = foreign_ids_reverse_mappings[model_name][unique_id]
                    import_data = dict(
                        deserialize_field(
                            k,
                            v,
                            fields_info=fields_info,
                            import_unique_ids_mappings=import_unique_ids_mappings,
                            foreign_ids_reverse_mappings=foreign_ids_reverse_mappings
                        ) for k, v in entry_data.items()
                    )
                    # TODO COMPARE, AND COMPARE ATTRIBUTES

                    if model is models.DbComment:
                        new_entry_uuid = merge_comment(import_data, comment_mode)
                        if new_entry_uuid is not None:
                            entry_data[unique_identifier] = new_entry_uuid
                            new_entries[model_name][import_entry_pk] = entry_data

                    if model_name not in ret_dict:
                        ret_dict[model_name] = {'new': [], 'existing': []}
                    ret_dict[model_name]['existing'].append((import_entry_pk, existing_entry_id))
                    if not silent:
                        print('existing %s: %s (%s->%s)' % (model_name, unique_id, import_entry_pk, existing_entry_id))
                        # print("  `-> WARNING: NO DUPLICITY CHECK DONE!")
                        # CHECK ALSO FILES!

                # Store all objects for this model in a list, and store them all in once at the end.
                objects_to_create = []
                # This is needed later to associate the import entry with the new pk
                import_new_entry_pks = {}
                imported_comp_names = set()

                # NEW ENTRIES
                for import_entry_pk, entry_data in new_entries[model_name].items():
                    unique_id = entry_data[unique_identifier]
                    import_data = dict(
                        deserialize_field(
                            k,
                            v,
                            fields_info=fields_info,
                            import_unique_ids_mappings=import_unique_ids_mappings,
                            foreign_ids_reverse_mappings=foreign_ids_reverse_mappings
                        ) for k, v in entry_data.items()
                    )

                    if model is models.DbGroup:
                        # Check if there is already a group with the same name
                        dupl_counter = 0
                        orig_label = import_data['label']
                        while model.objects.filter(label=import_data['label']):
                            import_data['label'] = orig_label + DUPL_SUFFIX.format(dupl_counter)
                            dupl_counter += 1
                            if dupl_counter == 100:
                                raise exceptions.ImportUniquenessError(
                                    'A group of that label ( {} ) already exists and I could not create a new one'
                                    ''.format(orig_label)
                                )

                    elif model is models.DbComputer:
                        # Check if there is already a computer with the same name in the database
                        dupl = (
                            model.objects.filter(name=import_data['name']) or import_data['name'] in imported_comp_names
                        )
                        orig_name = import_data['name']
                        dupl_counter = 0
                        while dupl:
                            # Rename the new computer
                            import_data['name'] = (orig_name + DUPL_SUFFIX.format(dupl_counter))
                            dupl = (
                                model.objects.filter(name=import_data['name']) or
                                import_data['name'] in imported_comp_names
                            )
                            dupl_counter += 1
                            if dupl_counter == 100:
                                raise exceptions.ImportUniquenessError(
                                    'A computer of that name ( {} ) already exists and I could not create a new one'
                                    ''.format(orig_name)
                                )

                        imported_comp_names.add(import_data['name'])

                    objects_to_create.append(model(**import_data))
                    import_new_entry_pks[unique_id] = import_entry_pk

                if model_name == NODE_ENTITY_NAME:
                    if not silent:
                        print('STORING NEW NODE REPOSITORY FILES...')

                    # NEW NODES
                    for object_ in objects_to_create:
                        import_entry_uuid = object_.uuid
                        import_entry_pk = import_new_entry_pks[import_entry_uuid]

                        # Before storing entries in the DB, I store the files (if these are nodes).
                        # Note: only for new entries!
                        subfolder = folder.get_subfolder(
                            os.path.join(NODES_EXPORT_SUBFOLDER, export_shard_uuid(import_entry_uuid))
                        )
                        if not subfolder.exists():
                            raise exceptions.CorruptArchive(
                                'Unable to find the repository folder for Node with UUID={} in the exported '
                                'file'.format(import_entry_uuid)
                            )
                        destdir = RepositoryFolder(section=Repository._section_name, uuid=import_entry_uuid)
                        # Replace the folder, possibly destroying existing previous folders, and move the files
                        # (faster if we are on the same filesystem, and in any case the source is a SandboxFolder)
                        destdir.replace_with_folder(subfolder.abspath, move=True, overwrite=True)

                        # For DbNodes, we also have to store its attributes
                        if not silent:
                            print('STORING NEW NODE ATTRIBUTES...')

                        # Get attributes from import file
                        try:
                            object_.attributes = data['node_attributes'][str(import_entry_pk)]
                        except KeyError:
                            raise exceptions.CorruptArchive(
                                'Unable to find attribute info for Node with UUID={}'.format(import_entry_uuid)
                            )

                        # For DbNodes, we also have to store its extras
                        if extras_mode_new == 'import':
                            if not silent:
                                print('STORING NEW NODE EXTRAS...')

                            # Get extras from import file
                            try:
                                extras = data['node_extras'][str(import_entry_pk)]
                            except KeyError:
                                raise exceptions.CorruptArchive(
                                    'Unable to find extra info for Node with UUID={}'.format(import_entry_uuid)
                                )
                            # TODO: remove when aiida extras will be moved somewhere else
                            # from here
                            extras = {key: value for key, value in extras.items() if not key.startswith('_aiida_')}
                            if object_.node_type.endswith('code.Code.'):
                                extras = {key: value for key, value in extras.items() if not key == 'hidden'}
                            # till here
                            object_.extras = extras
                        elif extras_mode_new == 'none':
                            if not silent:
                                print('SKIPPING NEW NODE EXTRAS...')
                        else:
                            raise exceptions.ImportValidationError(
                                "Unknown extras_mode_new value: {}, should be either 'import' or 'none'"
                                ''.format(extras_mode_new)
                            )

                    # EXISTING NODES (Extras)
                    # For the existing nodes that are also in the imported list we also update their extras if necessary
                    if not silent:
                        print('UPDATING EXISTING NODE EXTRAS (mode: {})'.format(extras_mode_existing))

                    import_existing_entry_pks = {
                        entry_data[unique_identifier]: import_entry_pk
                        for import_entry_pk, entry_data in existing_entries[model_name].items()
                    }
                    for node in models.DbNode.objects.filter(uuid__in=import_existing_entry_pks).all():  # pylint: disable=no-member
                        import_entry_uuid = str(node.uuid)
                        import_entry_pk = import_existing_entry_pks[import_entry_uuid]

                        # Get extras from import file
                        try:
                            extras = data['node_extras'][str(import_entry_pk)]
                        except KeyError:
                            raise exceptions.CorruptArchive(
                                'Unable to find extra info for ode with UUID={}'.format(import_entry_uuid)
                            )

                        # TODO: remove when aiida extras will be moved somewhere else
                        # from here
                        extras = {key: value for key, value in extras.items() if not key.startswith('_aiida_')}
                        if node.node_type.endswith('code.Code.'):
                            extras = {key: value for key, value in extras.items() if not key == 'hidden'}
                        # till here
                        node.extras = merge_extras(node.extras, extras, extras_mode_existing)

                        # Already saving existing node here to update its extras
                        node.save()

                # If there is an mtime in the field, disable the automatic update
                # to keep the mtime that we have set here
                if 'mtime' in [field.name for field in model._meta.local_fields]:
                    with models.suppress_auto_now([(model, ['mtime'])]):
                        # Store them all in once; however, the PK are not set in this way...
                        model.objects.bulk_create(objects_to_create)
                else:
                    model.objects.bulk_create(objects_to_create)

                # Get back the just-saved entries
                just_saved_queryset = model.objects.filter(
                    **{
                        '{}__in'.format(unique_identifier): import_new_entry_pks.keys()
                    }
                ).values_list(unique_identifier, 'pk')
                # note: convert uuids from type UUID to strings
                just_saved = {str(key): value for key, value in just_saved_queryset}

                # Now I have the PKs, print the info
                # Moreover, add newly created Nodes to foreign_ids_reverse_mappings
                for unique_id, new_pk in just_saved.items():
                    import_entry_pk = import_new_entry_pks[unique_id]
                    foreign_ids_reverse_mappings[model_name][unique_id] = new_pk
                    if model_name not in ret_dict:
                        ret_dict[model_name] = {'new': [], 'existing': []}
                    ret_dict[model_name]['new'].append((import_entry_pk, new_pk))

                    if not silent:
                        print('NEW %s: %s (%s->%s)' % (model_name, unique_id, import_entry_pk, new_pk))

            if not silent:
                print('STORING NODE LINKS...')
            import_links = data['links_uuid']
            links_to_store = []

            # Needed, since QueryBuilder does not yet work for recently saved Nodes
            existing_links_raw = models.DbLink.objects.all().values_list('input', 'output', 'label', 'type')
            existing_links = {(l[0], l[1], l[2], l[3]) for l in existing_links_raw}
            existing_outgoing_unique = {(l[0], l[3]) for l in existing_links_raw}
            existing_outgoing_unique_pair = {(l[0], l[2], l[3]) for l in existing_links_raw}
            existing_incoming_unique = {(l[1], l[3]) for l in existing_links_raw}
            existing_incoming_unique_pair = {(l[1], l[2], l[3]) for l in existing_links_raw}

            calculation_node_types = 'process.calculation.'
            workflow_node_types = 'process.workflow.'
            data_node_types = 'data.'

            link_mapping = {
                LinkType.CALL_CALC: (workflow_node_types, calculation_node_types, 'unique_triple', 'unique'),
                LinkType.CALL_WORK: (workflow_node_types, workflow_node_types, 'unique_triple', 'unique'),
                LinkType.CREATE: (calculation_node_types, data_node_types, 'unique_pair', 'unique'),
                LinkType.INPUT_CALC: (data_node_types, calculation_node_types, 'unique_triple', 'unique_pair'),
                LinkType.INPUT_WORK: (data_node_types, workflow_node_types, 'unique_triple', 'unique_pair'),
                LinkType.RETURN: (workflow_node_types, data_node_types, 'unique_pair', 'unique_triple'),
            }

            for link in import_links:
                # Check for dangling Links within the, supposed, self-consistent archive
                try:
                    in_id = foreign_ids_reverse_mappings[NODE_ENTITY_NAME][link['input']]
                    out_id = foreign_ids_reverse_mappings[NODE_ENTITY_NAME][link['output']]
                except KeyError:
                    if ignore_unknown_nodes:
                        continue
                    else:
                        raise exceptions.ImportValidationError(
                            'Trying to create a link with one or both unknown nodes, stopping (in_uuid={}, '
                            'out_uuid={}, label={}, type={})'.format(
                                link['input'], link['output'], link['label'], link['type']
                            )
                        )

                # Check if link already exists, skip if it does
                # This is equivalent to an existing triple link (i.e. unique_triple from below)
                if (in_id, out_id, link['label'], link['type']) in existing_links:
                    continue

                # Since backend specific Links (DbLink) are not validated upon creation, we will now validate them.
                try:
                    validate_link_label(link['label'])
                except ValueError as why:
                    raise exceptions.ImportValidationError('Error during Link label validation: {}'.format(why))

                source = models.DbNode.objects.get(id=in_id)
                target = models.DbNode.objects.get(id=out_id)

                if source.uuid == target.uuid:
                    raise exceptions.ImportValidationError('Cannot add a link to oneself')

                link_type = LinkType(link['type'])
                type_source, type_target, outdegree, indegree = link_mapping[link_type]

                # Check if source Node is a valid type
                if not source.node_type.startswith(type_source):
                    raise exceptions.ImportValidationError(
                        'Cannot add a {} link from {} to {}'.format(link_type, source.node_type, target.node_type)
                    )

                # Check if target Node is a valid type
                if not target.node_type.startswith(type_target):
                    raise exceptions.ImportValidationError(
                        'Cannot add a {} link from {} to {}'.format(link_type, source.node_type, target.node_type)
                    )

                # If the outdegree is `unique` there cannot already be any other outgoing link of that type,
                # i.e., the source Node may not have a LinkType of current LinkType, going out, existing already.
                if outdegree == 'unique' and (in_id, link['type']) in existing_outgoing_unique:
                    raise exceptions.ImportValidationError(
                        'Node<{}> already has an outgoing {} link'.format(source.uuid, link_type)
                    )

                # If the outdegree is `unique_pair`,
                # then the link labels for outgoing links of this type should be unique,
                # i.e., the source Node may not have a LinkType of current LinkType, going out,
                # that also has the current Link label, existing already.
                elif outdegree == 'unique_pair' and \
                (in_id, link['label'], link['type']) in existing_outgoing_unique_pair:
                    raise exceptions.ImportValidationError(
                        'Node<{}> already has an outgoing {} link with label "{}"'.format(
                            source.uuid, link_type, link['label']
                        )
                    )

                # If the indegree is `unique` there cannot already be any other incoming links of that type,
                # i.e., the target Node may not have a LinkType of current LinkType, coming in, existing already.
                if indegree == 'unique' and (out_id, link['type']) in existing_incoming_unique:
                    raise exceptions.ImportValidationError(
                        'Node<{}> already has an incoming {} link'.format(target.uuid, link_type)
                    )

                # If the indegree is `unique_pair`,
                # then the link labels for incoming links of this type should be unique,
                # i.e., the target Node may not have a LinkType of current LinkType, coming in
                # that also has the current Link label, existing already.
                elif indegree == 'unique_pair' and \
                (out_id, link['label'], link['type']) in existing_incoming_unique_pair:
                    raise exceptions.ImportValidationError(
                        'Node<{}> already has an incoming {} link with label "{}"'.format(
                            target.uuid, link_type, link['label']
                        )
                    )

                # New link
                links_to_store.append(
                    models.DbLink(input_id=in_id, output_id=out_id, label=link['label'], type=link['type'])
                )
                if 'Link' not in ret_dict:
                    ret_dict['Link'] = {'new': []}
                ret_dict['Link']['new'].append((in_id, out_id))

                # Add new Link to sets of existing Links 'input PK', 'output PK', 'label', 'type'
                existing_links.add((in_id, out_id, link['label'], link['type']))
                existing_outgoing_unique.add((in_id, link['type']))
                existing_outgoing_unique_pair.add((in_id, link['label'], link['type']))
                existing_incoming_unique.add((out_id, link['type']))
                existing_incoming_unique_pair.add((out_id, link['label'], link['type']))

            # Store new links
            if links_to_store:
                if not silent:
                    print('   ({} new links...)'.format(len(links_to_store)))

                models.DbLink.objects.bulk_create(links_to_store)
            else:
                if not silent:
                    print('   (0 new links...)')

            if not silent:
                print('STORING GROUP ELEMENTS...')
            import_groups = data['groups_uuid']
            for groupuuid, groupnodes in import_groups.items():
                # TODO: cache these to avoid too many queries
                group_ = models.DbGroup.objects.get(uuid=groupuuid)
                nodes_to_store = [foreign_ids_reverse_mappings[NODE_ENTITY_NAME][node_uuid] for node_uuid in groupnodes]
                if nodes_to_store:
                    group_.dbnodes.add(*nodes_to_store)

        ######################################################
        # Put everything in a specific group
        ######################################################
        existing = existing_entries.get(NODE_ENTITY_NAME, {})
        existing_pk = [foreign_ids_reverse_mappings[NODE_ENTITY_NAME][v['uuid']] for v in six.itervalues(existing)]
        new = new_entries.get(NODE_ENTITY_NAME, {})
        new_pk = [foreign_ids_reverse_mappings[NODE_ENTITY_NAME][v['uuid']] for v in six.itervalues(new)]

        pks_for_group = existing_pk + new_pk

        # So that we do not create empty groups
        if pks_for_group:
            # If user specified a group, import all things into it
            if not group:
                # Get an unique name for the import group, based on the current (local) time
                basename = timezone.localtime(timezone.now()).strftime('%Y%m%d-%H%M%S')
                counter = 0
                group_label = basename

                while Group.objects.find(filters={'label': group_label}):
                    counter += 1
                    group_label = '{}_{}'.format(basename, counter)

                    if counter == 100:
                        raise exceptions.ImportUniquenessError(
                            "Overflow of import groups (more than 100 import groups exists with basename '{}')"
                            ''.format(basename)
                        )
                group = Group(label=group_label, type_string=IMPORTGROUP_TYPE).store()

            # Add all the nodes to the new group
            # TODO: decide if we want to return the group label
            nodes = [entry[0] for entry in QueryBuilder().append(Node, filters={'id': {'in': pks_for_group}}).all()]
            group.add_nodes(nodes)

            if not silent:
                print("IMPORTED NODES ARE GROUPED IN THE IMPORT GROUP LABELED '{}'".format(group.label))
        else:
            if not silent:
                print('NO NODES TO IMPORT, SO NO GROUP CREATED, IF IT DID NOT ALREADY EXIST')

    if not silent:
        print('*** WARNING: MISSING EXISTING UUID CHECKS!!')
        print('*** WARNING: TODO: UPDATE IMPORT_DATA WITH DEFAULT VALUES! (e.g. calc status, user pwd, ...)')
        print('DONE.')

    return ret_dict
Ejemplo n.º 57
0
def extract_file(filename, extract, destination, silent=False):
    try:
        if tarfile.is_tarfile(filename):
            if silent == False:
                extract_dlg = xbmcgui.DialogProgress()
                extract_dlg.create('OpenELEC ',
                                   _(32186).encode('utf-8'), ' ', ' ')
                extract_dlg.update(0)
            compressed = tarfile.open(filename)
            if silent == False:
                xbmc.executebuiltin('ActivateWindow(busydialog)')
            names = compressed.getnames()
            if silent == False:
                xbmc.executebuiltin('Dialog.Close(busydialog)')
            for name in names:
                for search in extract:
                    if search in name:
                        fileinfo = compressed.getmember(name)
                        response = compressed.extractfile(fileinfo)
                        local_file = open(destination + name.rsplit('/', 1)[1],
                                          'wb')
                        total_size = fileinfo.size
                        minutes = 0
                        seconds = 0
                        rest = 1
                        speed = 1
                        start = time.time()
                        size = 1
                        part_size = 1
                        last_percent = 0
                        while 1:
                            part = response.read(32768)
                            part_size += len(part)
                            if silent == False:
                                if extract_dlg.iscanceled():
                                    local_file.close()
                                    response.close()
                                    return None
                            if not part or xbmc.abortRequested:
                                break
                            if time.time() > start + 2:
                                speed = int((part_size - size) /
                                            (time.time() - start) / 1024)
                                start = time.time()
                                size = part_size
                                rest = total_size - part_size
                                minutes = rest / 1024 / speed / 60
                                seconds = rest / 1024 / speed - minutes * 60
                            percent = int(part_size * 100.0 / total_size)
                            if silent == False:
                                extract_dlg.update(
                                    percent,
                                    _(32184) +
                                    ':  %s' % name.rsplit('/', 1)[1],
                                    _(32185) + ':  %d KB/s' % speed,
                                    _(32183) + ':  %d m %d s' %
                                    (minutes, seconds))
                                if extract_dlg.iscanceled():
                                    local_file.close()
                                    response.close()
                                    return None
                            else:
                                if percent > last_percent + 5:
                                    dbg_log(
                                        'oe::extract_file(' + destination +
                                        name.rsplit('/', 1)[1] + ')',
                                        '%d percent with %d KB/s' %
                                        (percent, speed))
                                    last_percent = percent
                            local_file.write(part)
                        local_file.close()
                        response.close()
        return 1
    except Exception, e:
        dbg_log('oe::extract_file', 'ERROR: (' + repr(e) + ')')
Ejemplo n.º 58
0
    def tar_files(self, aTarFileBytes, formatt):
        "When called will unpack and edit a Tar File and return a tar file"

        print "[*] TarFile size:", len(aTarFileBytes) / 1024, 'KB'

        if len(aTarFileBytes) > int(self.userConfig['TAR']['maxSize']):
            print "[!] TarFile over allowed size"
            mitmf_logger.info("TarFIle maxSize met {}".format(
                len(aTarFileBytes)))
            self.patched.put(aTarFileBytes)
            return

        with tempfile.NamedTemporaryFile() as tarFileStorage:
            tarFileStorage.write(aTarFileBytes)
            tarFileStorage.flush()

            if not tarfile.is_tarfile(tarFileStorage.name):
                print '[!] Not a tar file'
                self.patched.put(aTarFileBytes)
                return

            compressionMode = ':'
            if formatt == 'gz':
                compressionMode = ':gz'
            if formatt == 'bz':
                compressionMode = ':bz2'

            tarFile = None
            try:
                tarFileStorage.seek(0)
                tarFile = tarfile.open(fileobj=tarFileStorage,
                                       mode='r' + compressionMode)
            except tarfile.ReadError:
                pass

            if tarFile is None:
                print '[!] Not a tar file'
                self.patched.put(aTarFileBytes)
                return

            print '[*] Tar file contents and info:'
            print '[*] Compression:', formatt

            members = tarFile.getmembers()
            for info in members:
                print "\t", info.name, info.mtime, info.size

            newTarFileStorage = tempfile.NamedTemporaryFile()
            newTarFile = tarfile.open(mode='w' + compressionMode,
                                      fileobj=newTarFileStorage)

            patchCount = 0
            wasPatched = False

            for info in members:
                print "[*] >>> Next file in tarfile:", info.name

                if not info.isfile():
                    print info.name, 'is not a file'
                    newTarFile.addfile(info, tarFile.extractfile(info))
                    continue

                if info.size >= long(self.FileSizeMax):
                    print info.name, 'is too big'
                    newTarFile.addfile(info, tarFile.extractfile(info))
                    continue

                # Check against keywords
                keywordCheck = False

                if type(self.tarblacklist) is str:
                    if self.tarblacklist.lower() in info.name.lower():
                        keywordCheck = True

                else:
                    for keyword in self.tarblacklist:
                        if keyword.lower() in info.name.lower():
                            keywordCheck = True
                            continue

                if keywordCheck is True:
                    print "[!] Tar blacklist enforced!"
                    mitmf_logger.info('Tar blacklist enforced on {}'.format(
                        info.name))
                    continue

                # Try to patch
                extractedFile = tarFile.extractfile(info)

                if patchCount >= int(self.userConfig['TAR']['patchCount']):
                    newTarFile.addfile(info, extractedFile)
                else:
                    # create the file on disk temporarily for fileGrinder to run on it
                    with tempfile.NamedTemporaryFile() as tmp:
                        shutil.copyfileobj(extractedFile, tmp)
                        tmp.flush()
                        patchResult = self.binaryGrinder(tmp.name)
                        if patchResult:
                            patchCount += 1
                            file2 = "backdoored/" + os.path.basename(tmp.name)
                            print "[*] Patching complete, adding to tar file."
                            info.size = os.stat(file2).st_size
                            with open(file2, 'rb') as f:
                                newTarFile.addfile(info, f)
                            mitmf_logger.info(
                                "{} in tar patched, adding to tarfile".format(
                                    info.name))
                            os.remove(file2)
                            wasPatched = True
                        else:
                            print "[!] Patching failed"
                            with open(tmp.name, 'rb') as f:
                                newTarFile.addfile(info, f)
                            mitmf_logger.info(
                                "{} patching failed. Keeping original file in tar."
                                .format(info.name))
                if patchCount == int(self.userConfig['TAR']['patchCount']):
                    mitmf_logger.info("Met Tar config patchCount limit.")

            # finalize the writing of the tar file first
            newTarFile.close()

            # then read the new tar file into memory
            newTarFileStorage.seek(0)
            ret = newTarFileStorage.read()
            newTarFileStorage.close()  # it's automatically deleted

            if wasPatched is False:
                # If nothing was changed return the original
                print "[*] No files were patched forwarding original file"
                self.patched.put(aTarFileBytes)
                return
            else:
                self.patched.put(ret)
                return
Ejemplo n.º 59
0
def main():
    # Forced options from multiple sources are gathered into a shared list
    # so that the override order remains the same as on the command line.
    force_options_yaml = []

    def add_force_option_yaml(option, opt, value, parser):
        # XXX: check that YAML parses
        force_options_yaml.append(value)

    def add_force_option_file(option, opt, value, parser):
        # XXX: check that YAML parses
        with open(value, 'rb') as f:
            force_options_yaml.append(f.read())

    def add_force_option_define(option, opt, value, parser):
        tmp = value.split('=')
        if len(tmp) == 1:
            doc = {tmp[0]: True}
        elif len(tmp) == 2:
            doc = {tmp[0]: tmp[1]}
        else:
            raise Exception('invalid option value: %r' % value)
        force_options_yaml.append(yaml.safe_dump(doc))

    def add_force_option_undefine(option, opt, value, parser):
        tmp = value.split('=')
        if len(tmp) == 1:
            doc = {tmp[0]: False}
        else:
            raise Exception('invalid option value: %r' % value)
        force_options_yaml.append(yaml.safe_dump(doc))

    fixup_header_lines = []

    def add_fixup_header_line(option, opt, value, parser):
        fixup_header_lines.append(value)

    def add_fixup_header_file(option, opt, value, parser):
        with open(value, 'rb') as f:
            for line in f:
                if line[-1] == '\n':
                    line = line[:-1]
                fixup_header_lines.append(line)

    commands = [
        'autodetect-header', 'barebones-header', 'feature-documentation',
        'config-documentation'
    ]
    parser = optparse.OptionParser(
        usage='Usage: %prog [options] COMMAND',
        description=
        'Generate a duk_config.h or config option documentation based on config metadata.',
        epilog='COMMAND can be one of: ' + ', '.join(commands) + '.')
    parser.add_option('--metadata',
                      dest='metadata',
                      default=None,
                      help='metadata directory or metadata tar.gz file')
    parser.add_option(
        '--output',
        dest='output',
        default=None,
        help='output filename for C header or RST documentation file')
    parser.add_option('--platform',
                      dest='platform',
                      default=None,
                      help='platform (for "barebones-header" command)')
    parser.add_option('--compiler',
                      dest='compiler',
                      default=None,
                      help='compiler (for "barebones-header" command)')
    parser.add_option('--architecture',
                      dest='architecture',
                      default=None,
                      help='architecture (for "barebones-header" command)')
    parser.add_option(
        '--dll',
        dest='dll',
        action='store_true',
        default=False,
        help=
        'dll build of Duktape, affects symbol visibility macros especially on Windows'
    )  # FIXME: unimplemented
    parser.add_option(
        '--emit-legacy-feature-check',
        dest='emit_legacy_feature_check',
        action='store_true',
        default=False,
        help=
        'emit preprocessor checks to reject legacy feature options (DUK_OPT_xxx)'
    )
    parser.add_option(
        '--emit-config-sanity-check',
        dest='emit_config_sanity_check',
        action='store_true',
        default=False,
        help=
        'emit preprocessor checks for config option consistency (DUK_OPT_xxx)')
    parser.add_option(
        '--omit-removed-config-options',
        dest='omit_removed_config_options',
        action='store_true',
        default=False,
        help='omit removed config options from generated headers')
    parser.add_option(
        '--omit-deprecated-config-options',
        dest='omit_deprecated_config_options',
        action='store_true',
        default=False,
        help='omit deprecated config options from generated headers')
    parser.add_option('--omit-unused-config-options',
                      dest='omit_unused_config_options',
                      action='store_true',
                      default=False,
                      help='omit unused config options from generated headers')
    parser.add_option(
        '--define',
        type='string',
        dest='force_options_yaml',
        action='callback',
        callback=add_force_option_define,
        default=force_options_yaml,
        help=
        'force #define option using a C compiler like syntax, e.g. "--define DUK_USE_DEEP_C_STACK" or "--define DUK_USE_TRACEBACK_DEPTH=10"'
    )
    parser.add_option(
        '-D',
        type='string',
        dest='force_options_yaml',
        action='callback',
        callback=add_force_option_define,
        default=force_options_yaml,
        help=
        'synonym for --define, e.g. "-DDUK_USE_DEEP_C_STACK" or "-DDUK_USE_TRACEBACK_DEPTH=10"'
    )
    parser.add_option(
        '--undefine',
        type='string',
        dest='force_options_yaml',
        action='callback',
        callback=add_force_option_undefine,
        default=force_options_yaml,
        help=
        'force #undef option using a C compiler like syntax, e.g. "--undefine DUK_USE_DEEP_C_STACK"'
    )
    parser.add_option(
        '-U',
        type='string',
        dest='force_options_yaml',
        action='callback',
        callback=add_force_option_undefine,
        default=force_options_yaml,
        help='synonym for --undefine, e.g. "-UDUK_USE_DEEP_C_STACK"')
    parser.add_option(
        '--option-yaml',
        type='string',
        dest='force_options_yaml',
        action='callback',
        callback=add_force_option_yaml,
        default=force_options_yaml,
        help=
        'force option(s) using inline YAML (e.g. --option-yaml "DUK_USE_DEEP_C_STACK: true")'
    )
    parser.add_option('--option-file',
                      type='string',
                      dest='force_options_yaml',
                      action='callback',
                      callback=add_force_option_file,
                      default=force_options_yaml,
                      help='YAML file(s) providing config option overrides')
    parser.add_option(
        '--fixup-file',
        type='string',
        dest='fixup_header_lines',
        action='callback',
        callback=add_fixup_header_file,
        default=fixup_header_lines,
        help=
        'C header snippet file(s) to be appended to generated header, useful for manual option fixups'
    )
    parser.add_option(
        '--fixup-line',
        type='string',
        dest='fixup_header_lines',
        action='callback',
        callback=add_fixup_header_line,
        default=fixup_header_lines,
        help=
        'C header fixup line to be appended to generated header (e.g. --fixup-line "#define DUK_USE_FASTINT")'
    )
    parser.add_option(
        '--sanity-warning',
        dest='sanity_strict',
        action='store_false',
        default=True,
        help='emit a warning instead of #error for option sanity check issues')
    parser.add_option('--use-cpp-warning',
                      dest='use_cpp_warning',
                      action='store_true',
                      default=False,
                      help='emit a (non-portable) #warning when appropriate')
    (opts, args) = parser.parse_args()

    meta_dir = opts.metadata
    if opts.metadata is None:
        if os.path.isfile(os.path.join('.', 'genconfig_metadata.tar.gz')):
            opts.metadata = 'genconfig_metadata.tar.gz'
        elif os.path.isdir(os.path.join('.', 'config-options')):
            opts.metadata = '.'

    if opts.metadata is not None and os.path.isdir(opts.metadata):
        meta_dir = opts.metadata
        print 'Using metadata directory: %r' % meta_dir
    elif opts.metadata is not None and os.path.isfile(
            opts.metadata) and tarfile.is_tarfile(opts.metadata):
        meta_dir = get_auto_delete_tempdir()
        tar = tarfile.open(name=opts.metadata, mode='r:*')
        tar.extractall(path=meta_dir)
        print 'Using metadata tar file %r, unpacked to directory: %r' % (
            opts.metadata, meta_dir)
    else:
        raise Exception('metadata source must be a directory or a tar.gz file')

    scan_snippets(os.path.join(meta_dir, 'header-snippets'))
    scan_use_defs(os.path.join(meta_dir, 'config-options'))
    scan_opt_defs(os.path.join(meta_dir, 'feature-options'))
    scan_use_tags()
    scan_tags_meta(os.path.join(meta_dir, 'tags.yaml'))
    print('Scanned %d DUK_OPT_xxx, %d DUK_USE_XXX, %d helper snippets' % \
     (len(opt_defs.keys()), len(use_defs.keys()), len(helper_snippets)))
    #print('Tags: %r' % use_tags_list)

    if len(args) == 0:
        raise Exception('missing command')
    cmd = args[0]

    if cmd == 'autodetect-header':
        # Generate a duk_config.h similar to Duktape 1.2 feature detection.
        result = generate_autodetect_duk_config_header(opts, meta_dir)
        with open(opts.output, 'wb') as f:
            f.write(result)
    elif cmd == 'barebones-header':
        # Generate a duk_config.h with default options for a specific platform,
        # compiler, and architecture.
        result = generate_barebones_duk_config_header(opts, meta_dir)
        with open(opts.output, 'wb') as f:
            f.write(result)
    elif cmd == 'feature-documentation':
        result = generate_feature_option_documentation(opts)
        with open(opts.output, 'wb') as f:
            f.write(result)
    elif cmd == 'config-documentation':
        result = generate_config_option_documentation(opts)
        with open(opts.output, 'wb') as f:
            f.write(result)
    else:
        raise Exception('invalid command: %r' % cmd)
def _downloader_worker_thread(thread_num,
                              q,
                              force,
                              base_url,
                              gsutil,
                              out_q,
                              ret_codes,
                              verbose,
                              extract,
                              delete=True):
    while True:
        input_sha1_sum, output_filename = q.get()
        if input_sha1_sum is None:
            return
        extract_dir = None
        if extract:
            if not output_filename.endswith('.tar.gz'):
                out_q.put('%d> Error: %s is not a tar.gz archive.' %
                          (thread_num, output_filename))
                ret_codes.put(
                    (1, '%s is not a tar.gz archive.' % (output_filename)))
                continue
            extract_dir = output_filename[:-len('.tar.gz')]
        if os.path.exists(output_filename) and not force:
            if not extract or os.path.exists(extract_dir):
                if get_sha1(output_filename) == input_sha1_sum:
                    continue
        # Check if file exists.
        file_url = '%s/%s' % (base_url, input_sha1_sum)
        (code, _, err) = gsutil.check_call('ls', file_url)
        if code != 0:
            if code == 404:
                out_q.put('%d> File %s for %s does not exist, skipping.' %
                          (thread_num, file_url, output_filename))
                ret_codes.put((1, 'File %s for %s does not exist.' %
                               (file_url, output_filename)))
            elif code == 401:
                out_q.put(
                    """%d> Failed to fetch file %s for %s due to unauthorized access,
            skipping. Try running `gsutil.py config` and pass 0 if you don't
            know your project id.""" % (thread_num, file_url, output_filename))
                ret_codes.put((
                    1,
                    'Failed to fetch file %s for %s due to unauthorized access.'
                    % (file_url, output_filename)))
            else:
                # Other error, probably auth related (bad ~/.boto, etc).
                out_q.put(
                    '%d> Failed to fetch file %s for %s, skipping. [Err: %s]' %
                    (thread_num, file_url, output_filename, err))
                ret_codes.put((1, 'Failed to fetch file %s for %s. [Err: %s]' %
                               (file_url, output_filename, err)))
            continue
        # Fetch the file.
        if verbose:
            out_q.put('%d> Downloading %s...' % (thread_num, output_filename))
        try:
            if delete:
                os.remove(
                    output_filename)  # Delete the file if it exists already.
        except OSError:
            if os.path.exists(output_filename):
                out_q.put('%d> Warning: deleting %s failed.' %
                          (thread_num, output_filename))
        code, _, err = gsutil.check_call('cp', file_url, output_filename)
        if code != 0:
            out_q.put('%d> %s' % (thread_num, err))
            ret_codes.put((code, err))
            continue

        remote_sha1 = get_sha1(output_filename)
        if remote_sha1 != input_sha1_sum:
            msg = (
                '%d> ERROR remote sha1 (%s) does not match expected sha1 (%s).'
                % (thread_num, remote_sha1, input_sha1_sum))
            out_q.put(msg)
            ret_codes.put((20, msg))
            continue

        if extract:
            if not tarfile.is_tarfile(output_filename):
                out_q.put('%d> Error: %s is not a tar.gz archive.' %
                          (thread_num, output_filename))
                ret_codes.put(
                    (1, '%s is not a tar.gz archive.' % (output_filename)))
                continue
            with tarfile.open(output_filename, 'r:gz') as tar:
                dirname = os.path.dirname(os.path.abspath(output_filename))
                # If there are long paths inside the tarball we can get extraction
                # errors on windows due to the 260 path length limit (this includes
                # pwd). Use the extended path syntax.
                if sys.platform == 'win32':
                    dirname = '\\\\?\\%s' % dirname
                if not _validate_tar_file(tar, os.path.basename(extract_dir)):
                    out_q.put('%d> Error: %s contains files outside %s.' %
                              (thread_num, output_filename, extract_dir))
                    ret_codes.put(
                        (1,
                         '%s contains invalid entries.' % (output_filename)))
                    continue
                if os.path.exists(extract_dir):
                    try:
                        shutil.rmtree(extract_dir)
                        out_q.put('%d> Removed %s...' %
                                  (thread_num, extract_dir))
                    except OSError:
                        out_q.put('%d> Warning: Can\'t delete: %s' %
                                  (thread_num, extract_dir))
                        ret_codes.put((1, 'Can\'t delete %s.' % (extract_dir)))
                        continue
                out_q.put('%d> Extracting %d entries from %s to %s' %
                          (thread_num, len(
                              tar.getmembers()), output_filename, extract_dir))
                tar.extractall(path=dirname)
        # Set executable bit.
        if sys.platform == 'cygwin':
            # Under cygwin, mark all files as executable. The executable flag in
            # Google Storage will not be set when uploading from Windows, so if
            # this script is running under cygwin and we're downloading an
            # executable, it will be unrunnable from inside cygwin without this.
            st = os.stat(output_filename)
            os.chmod(output_filename, st.st_mode | stat.S_IEXEC)
        elif sys.platform != 'win32':
            # On non-Windows platforms, key off of the custom header
            # "x-goog-meta-executable".
            code, out, err = gsutil.check_call('stat', file_url)
            if code != 0:
                out_q.put('%d> %s' % (thread_num, err))
                ret_codes.put((code, err))
            elif re.search(r'executable:\s*1', out):
                st = os.stat(output_filename)
                os.chmod(output_filename, st.st_mode | stat.S_IEXEC)