def zip_file(zip_main_file_path, zip_main_file_name, main_directory, definitions, _totals_): tmp_dir = main_directory + '/temp_unzips' zip_list = Ziplevels() zip_list.path = zip_main_file_path zip_list.name = zip_main_file_name zip_list.tmp_dir = os.path.join(tmp_dir, os.path.splitext(zip_list.name)[0]) classes = [zip_list] try: for mylists in classes: if not os.path.exists(mylists.tmp_dir): os.makedirs(mylists.tmp_dir) z_file = tarfile.open(mylists.path) if tarfile.is_tarfile(mylists.path) else ZipFile(mylists.path) with z_file as zip_dir_path: zip_dir_path.extractall(path=mylists.tmp_dir) for (tmp_dir_name, tmp_sub_dir, tmp_file_name) in os.walk(mylists.tmp_dir, topdown=True): for zip_file_name in tmp_file_name: tmp_dir_path = os.path.join(tmp_dir_name, zip_file_name) if zipfile.is_zipfile(zip_file_name) or tarfile.is_tarfile(tmp_dir_path): temp_list = Ziplevels() temp_list.path = tmp_dir_path temp_list.name = zip_file_name temp_list.tmp_dir = os.path.splitext(tmp_dir_path)[0] classes.append(temp_list) else: file_check(tmp_dir_path, zip_file_name, main_directory, definitions, _totals_, zip_main_file_name) shutil.rmtree(tmp_dir) except Exception as e: log(zip_main_file_path + ' Is not a valid zipfile', str(e))
def check_volumes_result(target_folder): with (target_folder / "Manifest.yml").open("rt") as f: manifest = next(yaml.load_all(f)) volumes = manifest["volumes"] assert len(volumes["project"]) == 1 volume = volumes["project"]["care"] assert volume == "care.tar" archive = target_folder / "volumes" / "project" / volume assert archive.is_file() assert tarfile.is_tarfile(str(archive)) assert count_dir_contents(target_folder / "volumes" / "project") == 1 assert len(volumes["services"]) == 1 assert len(volumes["services"]["foo"]) == 2 volume = volumes["services"]["foo"]["/volume"] archive = target_folder / "volumes" / "services" / volume assert archive.is_file() assert tarfile.is_tarfile(str(archive)) volume = volumes["services"]["foo"]["/image_volume1"] archive = target_folder / "volumes" / "services" / volume assert archive.is_file() assert tarfile.is_tarfile(str(archive)) assert count_dir_contents(target_folder / "volumes" / "services") == 2 assert len(volumes["mounted"]) == 3 assert (target_folder / "volumes" / "mounted" / "asset.txt").is_file() assert (target_folder / "volumes" / "mounted" / "assets").is_dir() assert (target_folder / "volumes" / "mounted" / "assets" / "dummy").is_file() assert (target_folder / "volumes" / "mounted" / "local").is_dir() assert (target_folder / "volumes" / "mounted" / "local" / "dummy").is_file() assert count_dir_contents(target_folder / "volumes" / "mounted") == 3
def test_make_tarball(self): # creating something to tar root_dir, base_dir = self._create_files('') tmpdir2 = self.mkdtemp() # force shutil to create the directory os.rmdir(tmpdir2) # working with relative paths work_dir = os.path.dirname(tmpdir2) rel_base_name = os.path.join(os.path.basename(tmpdir2), 'archive') with support.change_cwd(work_dir): base_name = os.path.abspath(rel_base_name) tarball = make_archive(rel_base_name, 'gztar', root_dir, '.') # check if the compressed tarball was created self.assertEqual(tarball, base_name + '.tar.gz') self.assertTrue(os.path.isfile(tarball)) self.assertTrue(tarfile.is_tarfile(tarball)) with tarfile.open(tarball, 'r:gz') as tf: self.assertEqual(sorted(tf.getnames()), ['.', './file1', './file2', './sub', './sub/file3', './sub2']) # trying an uncompressed one with support.change_cwd(work_dir): tarball = make_archive(rel_base_name, 'tar', root_dir, '.') self.assertEqual(tarball, base_name + '.tar') self.assertTrue(os.path.isfile(tarball)) self.assertTrue(tarfile.is_tarfile(tarball)) with tarfile.open(tarball, 'r') as tf: self.assertEqual(sorted(tf.getnames()), ['.', './file1', './file2', './sub', './sub/file3', './sub2'])
def parser_check(): dirs,files = xbmcvfs.listdir(base_dir) if not dirs: dirpackages,filespackages = xbmcvfs.listdir(parser_packages_folder) if filespackages: for fich in filespackages: shutil.copyfile(os.path.join(parser_packages_folder,fich), os.path.join(parser_core_folder,fich)) xbmc.sleep(100) import tarfile if tarfile.is_tarfile(os.path.join(parser_core_folder,fich)): download_tools().extract(os.path.join(parser_core_folder,fich),parser_core_folder) download_tools().remove(os.path.join(parser_core_folder,fich)) else: dirsuserdata,files = xbmcvfs.listdir(parser_folder) for fich in files: dictionary_module = eval(readfile(os.path.join(parser_folder,fich))) if "url" in dictionary_module.keys(): add_new_parser(dictionary_module["url"]) else: xbmcvfs.copy(os.path.join(parser_packages_folder,fich.replace('.txt','.tar.gz')),os.path.join(parser_core_folder,fich.replace('.txt','.tar.gz'))) import tarfile if tarfile.is_tarfile(os.path.join(parser_core_folder,fich.replace('.txt','.tar.gz'))): download_tools().extract(os.path.join(parser_core_folder,fich.replace('.txt','.tar.gz')),parser_core_folder) download_tools().remove(os.path.join(parser_core_folder,fich.replace('.txt','.tar.gz'))) else: pass return
def runTest(self): """Unpack tarfile relative to srcdir""" try: tarfile.is_tarfile(self.filepath) logger.debug("Tarfile Acquired: %s"%self.filepath) except IOError, err: self.fail("%s is not a tarfile."%self.filepath)
def diff_ggl(self): args = self.args filename1, filename2, layout1, layout2 = args.f, args.F, args.l, args.L csvlayout1 = open(layout1, "r") if layout1 else None csvlayout2 = open(layout2, "r") if layout2 else None if tarfile.is_tarfile(filename1): paramconfig1, csvlayout1 = self.extract_from_ggl(filename1) else: paramconfig1 = open(filename1, "r") if tarfile.is_tarfile(filename2): paramconfig2, csvlayout2 = self.extract_from_ggl(filename2) else: paramconfig2 = open(filename2, "r") if paramconfig1 is not None and paramconfig2 is not None and csvlayout1 is not None and csvlayout2 is not None: params1 = self.read_paramconfig(paramconfig1, csvlayout1) params2 = self.read_paramconfig(paramconfig2, csvlayout2) self.compare(params1, params2) if type(paramconfig1) is file: paramconfig1.close() if type(paramconfig2) is file: paramconfig2.close() if type(csvlayout1) is file: csvlayout1.close() if type(csvlayout2) is file: csvlayout2.close() else: print args_parser.parse_args(["diff", "ggl", "-h"]) return
def is_tarfile(arg): """Helper function to test if a given filepath/file-like-object is of a tar like file. Limitation: We use name extension to determine this if the arg is a file-like-object. Valid extions are 'tar', 'gz', 'bz', 'bz2'.""" if isinstance(arg, str): # Process filepaths tarfile.is_tarfile(arg) elif hasattr(arg, 'name'): # At the moment, we cannot check bytestreams for being tar files return os.path.splitext(arg.name)[-1] in ['tar', 'gz', 'bz', 'bz2'] return False
def unpackArchive(archiveFile, targetBaseDir, subdir): """Unpack archive into a directory""" if subdir and not subdir.endswith('/'): subdir += '/' # unpack source archive if tarfile.is_tarfile(archiveFile): tarFile = tarfile.open(archiveFile, 'r') members = None if subdir: members = [ member for member in tarFile.getmembers() if member.name.startswith(subdir) ] if not members: sysExit('sub-directory %s not found in archive' % subdir) tarFile.extractall(targetBaseDir, members) tarFile.close() elif zipfile.is_zipfile(archiveFile): zipFile = zipfile.ZipFile(archiveFile, 'r') names = None if subdir: names = [ name for name in zipFile.namelist() if name.startswith(subdir) ] if not names: sysExit('sub-directory %s not found in archive' % subdir) zipFile.extractall(targetBaseDir, names) zipFile.close() elif archiveFile.split('/')[-1].split('.')[-1] == 'xz': ensureCommandIsAvailable('xz') Popen(['xz', '-f', '-d', '-k', archiveFile]).wait() tar = archiveFile[:-3] if tarfile.is_tarfile(tar): tarFile = tarfile.open(tar, 'r') members = None if subdir: if not subdir.endswith('/'): subdir += '/' members = [ member for member in tarFile.getmembers() if member.name.startswith(subdir) ] if not members: sysExit('sub-directory %s not found in archive' % subdir) tarFile.extractall(targetBaseDir) tarFile.close() else: sysExit('Unrecognized archive type in file ' + archiveFile)
def _extract(self, filename): """ extractor helper """ try: file_type = self._get_file_type(filename) opener = mode = None if file_type == 'zip': opener, mode = zipfile.ZipFile, 'r' elif file_type == 'gz': if tarfile.is_tarfile(filename): opener, mode = tarfile.open, 'r:gz' elif file_type == 'bz2': if tarfile.is_tarfile(filename): opener, mode = tarfile.open, 'r:bz2' if not opener: raise Exception("Unsupported file compression") cfile = opener(filename, mode) # if first member is dir, skip 1st container path if file_type == 'zip': members = cfile.namelist() else: members = cfile.getmembers() stdout = '' for member in members: if file_type == 'zip': member_name = member else: member_name = member.name stdout += "Extracted " + member_name + "\n" cfile.extractall(self.working_dir) cfile.close() except Exception as e: try: return self._extract_alternative(filename) except: raise Exception("Could not extract file: %s" % e) ret = {'out': 0, 'stderr': '', 'stdout': stdout} return ret
def test_create_tarfile(monkeypatch, settings, format, extension): settings.BACKUPED_ROOT = BACKUPED_ROOT try: os.makedirs(BACKUPED_ROOT) except OSError: pass filename = 'edoardo-0.0.0-201501231405' + extension filepath = os.path.join(BACKUPS_ROOT, filename) try: # Make sure it doesn't exist before running backup. os.remove(filepath) except OSError: pass monkeypatch.setattr('ideascube.serveradmin.backup.Backup.ROOT', BACKUPS_ROOT) monkeypatch.setattr('ideascube.serveradmin.backup.Backup.FORMAT', format) monkeypatch.setattr('ideascube.serveradmin.backup.make_name', lambda f: filename) proof_file = os.path.join(settings.BACKUPED_ROOT, 'backup.me') open(proof_file, mode='w') Backup.create() assert os.path.exists(filepath) assert tarfile.is_tarfile(filepath) archive = tarfile.open(filepath) assert './backup.me' in archive.getnames() archive.close() os.remove(filepath) os.remove(proof_file)
def _get_archive_filelist(filename): # type: (str) -> List[str] """Extract the list of files from a tar or zip archive. Args: filename: name of the archive Returns: Sorted list of files in the archive, excluding './' Raises: ValueError: when the file is neither a zip nor a tar archive FileNotFoundError: when the provided file does not exist (for Python 3) IOError: when the provided file does not exist (for Python 2) """ names = [] # type: List[str] if tarfile.is_tarfile(filename): with tarfile.open(filename) as tar_file: names = sorted(tar_file.getnames()) elif zipfile.is_zipfile(filename): with zipfile.ZipFile(filename) as zip_file: names = sorted(zip_file.namelist()) else: raise ValueError("Can not get filenames from '{!s}'. " "Not a tar or zip file".format(filename)) if "./" in names: names.remove("./") return names
def setup_buffer(self): if self.exists == False: return None if self.subfile == None: # assume a regular file or gzipped filename, file_extension = os.path.splitext(self.fname) if file_extension == '.gzip' or file_extension == '.gz': import gzip try: self.buffer = gzip.open(self.fname, 'r') self.is_gzip = True except: pass #print >> sys.stderr,'[e] bad gzip file?',self.fname else: self.buffer = self.fname else: import tarfile if tarfile.is_tarfile(self.fname): self.tarfile = tarfile.open(self.fname, "r:gz") try: tarinfo = self.tarfile.getmember(self.subfile) except: print >> sys.stderr,'[e] file in archive not found:',self.subfile tarinfo = None self.buffer = None if tarinfo != None: if tarinfo.isreg(): self.buffer = self.tarfile.extractfile(tarinfo) else: self.buffer = None else: self.buffer = None
def is_archive(filename): """ test if file is a valid archive (zip, tar or rar) """ return tarfile.is_tarfile(filename) or \ zipfile.is_zipfile(filename) or \ (ARCHIVE_RAR_AVAILABLE and rarfile.is_rarfile(filename))
def _assert_tar_count_equals(self, file_name, count): if not tarfile.is_tarfile(file_name): # Mabye it's not a tar cuz it's a status message. fin = open(file_name, 'r') contents = fin.read(256) fin.close() if contents.lower().find("no ") != -1: self.assertEqual( 0, count) return raise Exception("%s is not a tar file" % file_name) tar = tarfile.open(file_name) tmp_dir = "unit_test_tmp" if os.path.exists(tmp_dir): filenames = os.listdir(tmp_dir) for file in filenames: os.remove(os.path.join(tmp_dir, file)) os.rmdir(tmp_dir) os.mkdir(tmp_dir) tar.extractall(path=tmp_dir) tar.close() filenames = os.listdir(tmp_dir) try: self.assertEqual( len(filenames), count) finally: # clean up for file in filenames: os.remove(os.path.join(tmp_dir, file)) os.rmdir(tmp_dir)
def unarchive_file(archive_fpath, force_commonprefix=True): print('Unarchive: %r' % archive_fpath) if tarfile.is_tarfile(archive_fpath): return untar_file(archive_fpath, force_commonprefix=force_commonprefix) elif zipfile.is_zipfile(archive_fpath): return unzip_file(archive_fpath, force_commonprefix=force_commonprefix) elif archive_fpath.endswith('.gz') and not archive_fpath.endswith('.tar.gz'): """ from utool.util_grabdata import * archive_fpath = '/home/joncrall/.config/utool/train-images-idx3-ubyte.gz' """ # FIXME: unsure if this is general output_fpath = splitext(archive_fpath)[0] with gzip.open(archive_fpath, 'rb') as gzfile_: contents = gzfile_.read() with open(output_fpath, 'wb') as file_: file_.write(contents) return output_fpath #elif archive_fpath.endswith('.gz'): # # This is to handle .gz files (not .tar.gz) like how MNIST is stored # # Example: http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz # return ungz_file(archive_fpath) else: if archive_fpath.endswith('.zip') or archive_fpath.endswith('.tar.gz'): raise AssertionError('archive is corrupted: %r' % (archive_fpath,)) raise AssertionError('unknown archive format: %r' % (archive_fpath,))
def unarchive(archive_path, dest): """Extract the contents of a tar or zip file at *archive_path* into the directory *dest*. :type archive_path: str :param archive_path: path to archive file :type dest: str :param dest: path to directory where archive will be extracted *dest* will be created if it doesn't already exist. tar files can be gzip compressed, bzip2 compressed, or uncompressed. Files within zip files can be deflated or stored. """ if tarfile.is_tarfile(archive_path): with contextlib.closing(tarfile.open(archive_path, 'r')) as archive: archive.extractall(dest) elif zipfile.is_zipfile(archive_path): with contextlib.closing(zipfile.ZipFile(archive_path, 'r')) as archive: for name in archive.namelist(): # the zip spec specifies that front slashes are always # used as directory separators dest_path = os.path.join(dest, *name.split('/')) # now, split out any dirname and filename and create # one and/or the other dirname, filename = os.path.split(dest_path) if dirname and not os.path.exists(dirname): os.makedirs(dirname) if filename: with open(dest_path, 'wb') as dest_file: dest_file.write(archive.read(name)) else: raise IOError('Unknown archive type: %s' % (archive_path,))
def archive_open(name): if tarfile.is_tarfile(name): return tarfile.open(name) elif zipfile.is_zipfile(name): return zipfile.ZipFile(name) else: return None
def archive_get_type(name): if tarfile.is_tarfile(name): return 'tar' elif zipfile.is_zipfile(name): return 'zip' else: return None
def read_packages(self): print("Reading {0}...".format(self._cache_db), end="", flush=True) if not os.path.exists(self._cache_db): print(" not found!") return False if not tarfile.is_tarfile(self._cache_db): print(" not a tar!") return False tar = tarfile.open(self._cache_db) pkg_info = {} self._pkgs = {} for info in tar.getmembers(): if not info.isfile(): continue ( binpkg_name, file_name ) = info.name.split("/") pi = pkg_info.get(binpkg_name, {}) with tar.extractfile(info) as file: pi[file_name] = [x.decode("utf-8").rstrip() for x in file.readlines()] if len(pi.keys() & { "desc", "depends" }) == 2: BinaryRepo.BinPkg(self, pi["desc"], pi["depends"]) del pkg_info[binpkg_name] continue pkg_info[binpkg_name] = pi if len(pkg_info) != 0: raise Exception("Incomplete packages in DB") print(" done")
def decompress(filename, out_dir='/tmp/decompressed'): """ Given a tar.gz or a zip, extract the contents and return a list of files. If the out_dir already exists, we skip decompression and just return the files inside that dir. Otherwise it will be created from scratch and filled with the files from the compressed file """ if os.path.exists(out_dir): return glob.glob(os.path.join(out_dir, '*')) os.makedirs(out_dir) del_dir = False fn = filename #alias try: if zipfile.is_zipfile(fn): zipfile.ZipFile(fn, 'r').extractall(out_dir) elif tarfile.is_tarfile(fn): tarfile.open(fn, 'r').extractall(out_dir) else: raise ValueError('Invalid file type - must be tar.gz or zip') except Exception as e: del_dir = True #delete the partially created out_dir raise e #pass exception through finally: if del_dir: shutil.rmtree(out_dir) return [os.path.join(out_dir, f) for f in os.listdir(out_dir)]
def extract(path, extdir=None, delete=False): """ Takes in a tar or zip file and extracts it to extdir If extdir is not specified, extracts to path If delete is set to True, deletes the bundle at path Returns the list of top level files that were extracted """ if zipfile.is_zipfile(path): bundle = zipfile.ZipFile(path) namelist = bundle.namelist() elif tarfile.is_tarfile(path): bundle = tarfile.open(path) namelist = bundle.getnames() else: return if extdir is None: extdir = os.path.dirname(path) elif not os.path.exists(extdir): os.makedirs(extdir) bundle.extractall(path=extdir) bundle.close() if delete: os.remove(path) return [os.path.join(extdir, name) for name in namelist if len(name.rstrip(os.sep).split(os.sep)) == 1]
def repackage_dmg(infile, output): if not tarfile.is_tarfile(infile): raise Exception("Input file %s is not a valid tarfile." % infile) tmpdir = tempfile.mkdtemp() try: with tarfile.open(infile) as tar: tar.extractall(path=tmpdir) # Remove the /Applications symlink. If we don't, an rsync command in # create_dmg() will break, and create_dmg() re-creates the symlink anyway. try: os.remove(mozpath.join(tmpdir, ' ')) except OSError as e: if e.errno != errno.ENOENT: raise volume_name = get_application_ini_value(tmpdir, 'App', 'CodeName') # The extra_files argument is empty [] because they are already a part # of the original dmg produced by the build, and they remain in the # tarball generated by the signing task. create_dmg(tmpdir, output, volume_name, []) finally: shutil.rmtree(tmpdir)
def create_install_repo_from_tgz_node(host_string, *tgzs, **kwargs): """Create contrail repos from each tgz files in the given node * tgzs can be absolute/relative paths or a pattern """ # verify tgz's availability cant_use = [] usable_tgz_files = [] for tgz in tgzs: tgz_files = os.path.abspath(os.path.expanduser(tgz)) tgz_file_list = glob.glob(tgz_files) for tgz_file in tgz_file_list: if not os.access(tgz_file, os.R_OK): cant_use.append(tgz_file) elif not tarfile.is_tarfile(tgz_file): cant_use.append(tgz_file) else: usable_tgz_files.append(tgz_file) if len(cant_use) != 0: print "ERROR: TGZ file mentioned below are not readable or", \ "not a valid tgz file or do not exists" print "\n".join(cant_use) for tgz in usable_tgz_files: with settings(host_string=host_string, warn_only=True): os_type = detect_ostype() if os_type in ['centos', 'fedora', 'redhat', 'centoslinux']: execute(create_yum_repo_from_tgz_node, tgz, host_string, **kwargs) elif os_type in ['ubuntu']: execute(create_apt_repo_from_tgz_node, tgz, host_string, **kwargs)
def unpack_file(filename, location, content_type, link): filename = os.path.realpath(filename) if (content_type == 'application/zip' or filename.lower().endswith(ZIP_EXTENSIONS) or zipfile.is_zipfile(filename)): unzip_file( filename, location, flatten=not filename.endswith('.whl') ) elif (content_type == 'application/x-gzip' or tarfile.is_tarfile(filename) or filename.lower().endswith( TAR_EXTENSIONS + BZ2_EXTENSIONS + XZ_EXTENSIONS)): untar_file(filename, location) elif (content_type and content_type.startswith('text/html') and is_svn_page(file_contents(filename))): # We don't really care about this from pip.vcs.subversion import Subversion Subversion('svn+' + link.url).unpack(location) else: # FIXME: handle? # FIXME: magic signatures? logger.critical( 'Cannot unpack file %s (downloaded from %s, content-type: %s); ' 'cannot detect archive format', filename, location, content_type, ) raise InstallationError( 'Cannot determine archive format of %s' % location )
def install(src, dest): """Install a zip, exe, tar.gz, tar.bz2 or dmg file, and return the path of the installation folder. :param src: Path to the install file :param dest: Path to install to (to ensure we do not overwrite any existent files the folder should not exist yet) """ src = os.path.realpath(src) dest = os.path.realpath(dest) if not is_installer(src): raise InvalidSource(src + ' is not valid installer file.') if not os.path.exists(dest): os.makedirs(dest) trbk = None try: install_dir = None if zipfile.is_zipfile(src) or tarfile.is_tarfile(src): install_dir = mozfile.extract(src, dest)[0] elif src.lower().endswith('.dmg'): install_dir = _install_dmg(src, dest) elif src.lower().endswith('.exe'): install_dir = _install_exe(src, dest) return install_dir except Exception, ex: cls, exc, trbk = sys.exc_info() error = InstallError('Failed to install "%s (%s)"' % (src, str(ex))) raise InstallError, error, trbk
def unzip_archive(archive): """ Unzips an archive into a temporary directory Returns a link to that directory Arguments: archive -- the path to an archive file """ tmpdir = os.path.join(tempfile.gettempdir(), os.path.basename(archive)) assert tmpdir != archive # That wouldn't work out if os.path.exists(tmpdir): # files are already extracted pass else: if tarfile.is_tarfile(archive): print 'Extracting tarfile ...' with tarfile.open(archive) as tf: tf.extractall(path=tmpdir) elif zipfile.is_zipfile(archive): print 'Extracting zipfile ...' with zipfile.ZipFile(archive) as zf: zf.extractall(path=tmpdir) else: raise ValueError('Unknown file type for %s' % os.path.basename(archive)) return tmpdir
def handle_rules_in_tar(self, f): if (not tarfile.is_tarfile(f.name)): raise OSError("Invalid tar file") self.updated_date = timezone.now() self.first_run = False repo = self.get_git_repo(delete = True) f.seek(0) # extract file tfile = tarfile.open(fileobj=f) # FIXME This test is only for rules archive for member in tfile.getmembers(): if not member.name.startswith("rules"): raise SuspiciousOperation("Suspect tar file contains a invalid name '%s'" % (member.name)) source_git_dir = os.path.join(settings.GIT_SOURCES_BASE_DIRECTORY, str(self.pk)) tfile.extractall(path=source_git_dir) index = repo.index if len(index.diff(None)) or self.first_run: os.environ['USERNAME'] = '******' index.add(["rules"]) message = 'source version at %s' % (self.updated_date) index.commit(message) self.save() # Now we must update SourceAtVersion for this source # or create it if needed self.create_sourceatversion() # Get categories self.get_categories(tfile)
def can_read(cls, filename): """ Given an archive filename, returns True if this class can read and process the archive format of that file. """ return tarfile.is_tarfile(filename)
def extract_and_process(file_name, callback, *additional_args): """ Extracts a tar file and runs 'callback' on all files inside file_name: tar file to extract callback: function to run on all files additional_args: optional, additional arguments for callback """ folder_name = os.path.basename(file_name).split('.')[0] if os.path.exists(folder_name): print "This script will delete the folder" + \ "'%s' and all its contents" % folder_name are_you_sure() process_folder(folder_name, os.remove) os.rmdir(folder_name) os.mkdir(folder_name) try: print "Extracting %s to %s" % (file_name, folder_name) if not tarfile.is_tarfile(file_name): raise Exception("Not a tarfile") tar = tarfile.open(file_name) tar.extractall(folder_name) tar.close() print "Extraction successful." process_folder(folder_name, callback, *additional_args) finally: process_folder(folder_name, os.remove) os.rmdir(folder_name)
def is_tar(path, file): try: if tarfile.is_tarfile(os.path.join(path, file)): return True except OSError, e: logging.error("Error in is_tar for '%s': %s" % (file, e)) raise OSError
def do_get_s3_archive(profile, region, bucket, table, archive): """ Fetch latest file named filename from S3 Bucket must exist prior to running this function. filename is args.dumpPath. File would be "args.dumpPath" with suffix .tar.bz2 or .zip """ s3 = _get_aws_client(profile, region, "s3") if archive: if archive == "tar": archive_type = "tar.bz2" else: archive_type = "zip" # Make sure bucket exists before continuing try: s3.head_bucket( Bucket=bucket ) except botocore.exceptions.ClientError as e: logging.exception("S3 bucket " + bucket + " does not exist. " "Can't get backup file\n\n" + str(e)) sys.exit(1) try: contents = s3.list_objects_v2( Bucket=bucket, Prefix=args.dumpPath ) except botocore.exceptions.ClientError as e: logging.exception("Issue listing contents of bucket " + bucket + "\n\n" + str(e)) sys.exit(1) # Script will always overwrite older backup. Bucket versioning stores multiple backups. # Therefore, just get item from bucket based on table name since that's what we name the files. filename = None for d in contents["Contents"]: if d["Key"] == "{}/{}.{}".format(args.dumpPath, table, archive_type): filename = d["Key"] if not filename: logging.exception("Unable to find file to restore from. " "Confirm the name of the table you're restoring.") sys.exit(1) output_file = "/tmp/" + os.path.basename(filename) logging.info("Downloading file " + filename + " to " + output_file) s3.download_file(bucket, filename, output_file) # Extract archive based on suffix if tarfile.is_tarfile(output_file): try: logging.info("Extracting tar file...") with tarfile.open(name=output_file, mode="r:bz2") as a: a.extractall(path=".") except tarfile.ReadError as e: logging.exception("Error reading downloaded archive\n\n" + str(e)) sys.exit(1) except tarfile.ExtractError as e: # ExtractError is raised for non-fatal errors on extract method logging.error("Error during extraction: " + str(e)) # Assuming zip file here since we're only supporting tar and zip at this time else: try: logging.info("Extracting zip file...") with zipfile.ZipFile(output_file, "r") as z: z.extractall(path=".") except zipfile.BadZipFile as e: logging.exception("Problem extracting zip file\n\n" + str(e)) sys.exit(1)
def uncompress_file(func, filename, *args, **kwargs): """ Decorator used for temporary uncompressing file if .gz or .bz2 archive. """ if not kwargs.get('check_compression', True): return func(filename, *args, **kwargs) if not isinstance(filename, str): return func(filename, *args, **kwargs) elif not Path(filename).exists(): msg = "File not found '%s'" % (filename) raise IOError(msg) # check if we got a compressed file or archive obj_list = [] if tarfile.is_tarfile(filename): try: # reading with transparent compression with tarfile.open(filename, 'r|*') as tar: for tarinfo in tar: # only handle regular files if not tarinfo.isfile(): continue data = tar.extractfile(tarinfo).read() # Skip empty files - we don't need them no matter what # and it guards against rare cases where waveforms files # are also slightly valid tar-files. if not data: continue obj_list.append(data) except Exception: pass elif zipfile.is_zipfile(filename): try: zip = zipfile.ZipFile(filename) obj_list = [zip.read(name) for name in zip.namelist()] except Exception: pass elif filename.endswith('.bz2'): # bz2 module try: import bz2 with open(filename, 'rb') as fp: obj_list.append(bz2.decompress(fp.read())) except Exception: pass elif filename.endswith('.gz'): # gzip module try: import gzip with gzip.open(filename, 'rb') as fp: obj_list.append(fp.read()) except Exception: pass # handle results if obj_list: # write results to temporary files result = None for obj in obj_list: with NamedTemporaryFile() as tempfile: tempfile._fileobj.write(obj) stream = func(tempfile.name, *args, **kwargs) # just add other stream objects to first stream if result is None: result = stream else: result += stream else: # no compressions result = func(filename, *args, **kwargs) return result
def DownloadURL(url, context, force, dontExtract=None): """Download and extract the archive file at given URL to the source directory specified in the context. dontExtract may be a sequence of path prefixes that will be excluded when extracting the archive. Returns the absolute path to the directory where files have been extracted.""" with CurrentWorkingDirectory(context.srcDir): # Extract filename from URL and see if file already exists. filename = url.split("/")[-1] if force and os.path.exists(filename): os.remove(filename) if os.path.exists(filename): PrintInfo("{0} already exists, skipping download".format( os.path.abspath(filename))) else: PrintInfo("Downloading {0} to {1}".format( url, os.path.abspath(filename))) # To work around occasional hiccups with downloading from websites # (SSL validation errors, etc.), retry a few times if we don't # succeed in downloading the file. maxRetries = 5 lastError = None # Download to a temporary file and rename it to the expected # filename when complete. This ensures that incomplete downloads # will be retried if the script is run again. tmpFilename = filename + ".tmp" if os.path.exists(tmpFilename): os.remove(tmpFilename) for i in xrange(maxRetries): try: context.downloader(url, tmpFilename) break except Exception as e: PrintCommandOutput( "Retrying download due to error: {err}\n".format( err=e)) lastError = e else: errorMsg = str(lastError) if "SSL: TLSV1_ALERT_PROTOCOL_VERSION" in errorMsg: errorMsg += ( "\n\n" "Your OS or version of Python may not support " "TLS v1.2+, which is required for downloading " "files from certain websites. This support " "was added in Python 2.7.9." "\n\n" "You can use curl to download dependencies " "by installing it in your PATH and re-running " "this script.") raise RuntimeError("Failed to download {url}: {err}".format( url=url, err=errorMsg)) shutil.move(tmpFilename, filename) # Open the archive and retrieve the name of the top-most directory. # This assumes the archive contains a single directory with all # of the contents beneath it. archive = None rootDir = None members = None try: if tarfile.is_tarfile(filename): archive = tarfile.open(filename) rootDir = archive.getnames()[0].split('/')[0] if dontExtract != None: members = (m for m in archive.getmembers() if not any((fnmatch.fnmatch(m.name, p) for p in dontExtract))) elif zipfile.is_zipfile(filename): archive = zipfile.ZipFile(filename) rootDir = archive.namelist()[0].split('/')[0] if dontExtract != None: members = (m for m in archive.getnames() if not any((fnmatch.fnmatch(m, p) for p in dontExtract))) else: raise RuntimeError("unrecognized archive file type") with archive: extractedPath = os.path.abspath(rootDir) if force and os.path.isdir(extractedPath): shutil.rmtree(extractedPath) if os.path.isdir(extractedPath): PrintInfo("Directory {0} already exists, skipping extract". format(extractedPath)) else: PrintInfo( "Extracting archive to {0}".format(extractedPath)) # Extract to a temporary directory then move the contents # to the expected location when complete. This ensures that # incomplete extracts will be retried if the script is run # again. tmpExtractedPath = os.path.abspath("extract_dir") if os.path.isdir(tmpExtractedPath): shutil.rmtree(tmpExtractedPath) archive.extractall(tmpExtractedPath, members=members) shutil.move(os.path.join(tmpExtractedPath, rootDir), extractedPath) shutil.rmtree(tmpExtractedPath) return extractedPath except Exception as e: # If extraction failed for whatever reason, assume the # archive file was bad and move it aside so that re-running # the script will try downloading and extracting again. shutil.move(filename, filename + ".bad") raise RuntimeError( "Failed to extract archive {filename}: {err}".format( filename=filename, err=e))
import tarfile for f_name in [ 'hello.py', 'work.tar.gz', 'welcome.py', 'nofile.tar', 'sample.tar.xz' ]: try: print('{:} {}'.format(f_name, tarfile.is_tarfile(f_name))) except IOError as err: print('{:} {}'.format(f_name, err))
def cached_path( url_or_filename: Union[str, PathLike], cache_dir: Union[str, Path] = None, extract_archive: bool = False, force_extract: bool = False, ) -> str: """ Given something that might be a URL (or might be a local path), determine which. If it's a URL, download the file and cache it, and return the path to the cached file. If it's already a local path, make sure the file exists and then return the path. # Parameters url_or_filename : `Union[str, Path]` A URL or local file to parse and possibly download. cache_dir : `Union[str, Path]`, optional (default = `None`) The directory to cache downloads. extract_archive : `bool`, optional (default = `False`) If `True`, then zip or tar.gz archives will be automatically extracted. In which case the directory is returned. force_extract : `bool`, optional (default = `False`) If `True` and the file is an archive file, it will be extracted regardless of whether or not the extracted directory already exists. """ if cache_dir is None: cache_dir = CACHE_DIRECTORY cache_dir = os.path.expanduser(cache_dir) os.makedirs(cache_dir, exist_ok=True) if isinstance(url_or_filename, PathLike): url_or_filename = str(url_or_filename) file_path: str # If we're using the /a/b/foo.zip!c/d/file.txt syntax, handle it here. exclamation_index = url_or_filename.find("!") if extract_archive and exclamation_index >= 0: archive_path = url_or_filename[:exclamation_index] file_name = url_or_filename[exclamation_index + 1:] # Call 'cached_path' recursively now to get the local path to the archive itself. cached_archive_path = cached_path(archive_path, cache_dir, True, force_extract) if not os.path.isdir(cached_archive_path): raise ValueError( f"{url_or_filename} uses the ! syntax, but does not specify an archive file." ) # Now return the full path to the desired file within the extracted archive, # provided it exists. file_path = os.path.join(cached_archive_path, file_name) if not os.path.exists(file_path): raise FileNotFoundError( f"file {file_name} not found within {archive_path}") return file_path parsed = urlparse(url_or_filename) extraction_path: Optional[str] = None if parsed.scheme in ("http", "https", "s3"): # URL, so get it from the cache (downloading if necessary) file_path = get_from_cache(url_or_filename, cache_dir) if extract_archive and (is_zipfile(file_path) or tarfile.is_tarfile(file_path)): # This is the path the file should be extracted to. # For example ~/.allennlp/cache/234234.21341 -> ~/.allennlp/cache/234234.21341-extracted extraction_path = file_path + "-extracted" else: url_or_filename = os.path.expanduser(url_or_filename) if os.path.exists(url_or_filename): # File, and it exists. file_path = url_or_filename # Normalize the path. url_or_filename = os.path.abspath(url_or_filename) if extract_archive and (is_zipfile(file_path) or tarfile.is_tarfile(file_path)): # We'll use a unique directory within the cache to root to extract the archive to. # The name of the directoy is a hash of the resource file path and it's modification # time. That way, if the file changes, we'll know when to extract it again. extraction_name = (_resource_to_filename( url_or_filename, str(os.path.getmtime(file_path))) + "-extracted") extraction_path = os.path.join(cache_dir, extraction_name) elif parsed.scheme == "": # File, but it doesn't exist. raise FileNotFoundError(f"file {url_or_filename} not found") else: # Something unknown raise ValueError( f"unable to parse {url_or_filename} as a URL or as a local path" ) if extraction_path is not None: # If the extracted directory already exists (and is non-empty), then no # need to extract again unless `force_extract=True`. if os.path.isdir(extraction_path) and os.listdir( extraction_path) and not force_extract: return extraction_path # Extract it. with FileLock(extraction_path + ".lock"): logger.info("Extracting %s to %s", url_or_filename, extraction_path) shutil.rmtree(extraction_path, ignore_errors=True) # We extract first to a temporary directory in case something goes wrong # during the extraction process so we don't end up with a corrupted cache. tmp_extraction_dir = tempfile.mkdtemp( dir=os.path.split(extraction_path)[0]) try: if is_zipfile(file_path): with ZipFile(file_path, "r") as zip_file: zip_file.extractall(tmp_extraction_dir) zip_file.close() else: tar_file = tarfile.open(file_path) tar_file.extractall(tmp_extraction_dir) tar_file.close() # Extraction was successful, rename temp directory to final # cache directory and dump the meta data. os.replace(tmp_extraction_dir, extraction_path) meta = _Meta( resource=url_or_filename, cached_path=extraction_path, creation_time=time.time(), extraction_dir=True, size=_get_resource_size(extraction_path), ) meta.to_file() finally: shutil.rmtree(tmp_extraction_dir, ignore_errors=True) return extraction_path return file_path
def __init__(self, file_uri: str, encoding: str = DEFAULT_ENCODING, cache_dir: str = None) -> None: self.uri = file_uri self._encoding = encoding self._cache_dir = cache_dir self._archive_handle: Any = None # only if the file is inside an archive main_file_uri, path_inside_archive = parse_embeddings_file_uri( file_uri) main_file_local_path = cached_path(main_file_uri, cache_dir=cache_dir) if zipfile.is_zipfile(main_file_local_path): # ZIP archive self._open_inside_zip(main_file_uri, path_inside_archive) elif tarfile.is_tarfile(main_file_local_path): # TAR archive self._open_inside_tar(main_file_uri, path_inside_archive) else: # all the other supported formats, including uncompressed files if path_inside_archive: raise ValueError('Unsupported archive format: %s' + main_file_uri) # All the python packages for compressed files share the same interface of io.open extension = get_file_extension(main_file_uri) # Some systems don't have support for all of these libraries, so we import them only # when necessary. package = None if extension in ['.txt', '.vec']: package = io elif extension == '.gz': import gzip package = gzip elif extension == ".bz2": import bz2 package = bz2 elif extension == ".lzma": import lzma package = lzma if package is None: logger.warning( 'The embeddings file has an unknown file extension "%s". ' 'We will assume the file is an (uncompressed) text file', extension) package = io self._handle = package.open(main_file_local_path, 'rt', encoding=encoding) # type: ignore # To use this with tqdm we'd like to know the number of tokens. It's possible that the # first line of the embeddings file contains this: if it does, we want to start iteration # from the 2nd line, otherwise we want to start from the 1st. # Unfortunately, once we read the first line, we cannot move back the file iterator # because the underlying file may be "not seekable"; we use itertools.chain instead. first_line = next(self._handle) # this moves the iterator forward self.num_tokens = EmbeddingsTextFile._get_num_tokens_from_first_line( first_line) if self.num_tokens: # the first line is a header line: start iterating from the 2nd line self._iterator = self._handle else: # the first line is not a header line: start iterating from the 1st line self._iterator = itertools.chain([first_line], self._handle)
def dosub(sub): sub.set_processing_started() sub.save() print('Submission disk file:', sub.disk_file) if sub.disk_file is None: logmsg('Sub %i: retrieving URL' % (sub.id), sub.url) (fn, headers) = urllib.urlretrieve(sub.url) logmsg('Sub %i: wrote URL to file' % (sub.id), fn) df = DiskFile.from_file(fn, Image.ORIG_COLLECTION) logmsg('Created DiskFile', df) # Try to split the URL into a filename component and save it p = urlparse(sub.url) p = p.path if p: s = p.split('/') origname = s[-1] sub.original_filename = origname df.save() sub.disk_file = df sub.save() logmsg('Saved DiskFile', df) else: logmsg('uploaded disk file for this submission is ' + str(sub.disk_file)) df = sub.disk_file fn = df.get_path() logmsg('DiskFile path ' + fn) original_filename = sub.original_filename # check if file is a gzipped file try: gzip_file = gzip.open(fn) f,tempfn = tempfile.mkstemp() os.close(f) f = open(tempfn,'wb') # should fail on the following line if not a gzip file f.write(gzip_file.read()) f.close() gzip_file.close() df = DiskFile.from_file(tempfn, 'uploaded-gunzip') i = original_filename.find('.gz') if i != -1: original_filename = original_filename[:i] logmsg('extracted gzip file %s' % original_filename) #fn = tempfn fn = df.get_path() except: # not a gzip file pass is_tar = False try: is_tar = tarfile.is_tarfile(fn) except: pass if is_tar: logmsg('File %s: tarball' % fn) tar = tarfile.open(fn) dirnm = tempfile.mkdtemp() for tarinfo in tar.getmembers(): if tarinfo.isfile(): logmsg('extracting file %s' % tarinfo.name) tar.extract(tarinfo, dirnm) tempfn = os.path.join(dirnm, tarinfo.name) df = DiskFile.from_file(tempfn, 'uploaded-untar') # create Image object img = get_or_create_image(df) # create UserImage object. if img: create_user_image(sub, img, tarinfo.name) tar.close() shutil.rmtree(dirnm, ignore_errors=True) else: # assume file is single image logmsg('File %s: single file' % fn) # create Image object img = get_or_create_image(df) logmsg('File %s: created Image %s' % (fn, str(img))) # create UserImage object. if img: logmsg('File %s: Image id %i' % (fn, img.id)) uimg = create_user_image(sub, img, original_filename) logmsg('Image %i: created UserImage %i' % (img.id, uimg.id)) sub.set_processing_finished() sub.save() return sub.id
def read_part(file_path, size=DATA_MAX_BUFF, encoding=CODING_TYPE): '分段获取文件内容' if tarfile.is_tarfile(file_path): return read_tar_gz_file_part(file_path, size, encoding) return read_normal_file_part(file_path, size, encoding)
def cached_path( url_or_filename, cache_dir=None, force_download=False, proxies=None, resume_download=False, user_agent=None, extract_compressed_file=False, force_extract=False, local_files_only=False, ) -> Optional[str]: """ Given something that might be a URL (or might be a local path), determine which. If it's a URL, download the file and cache it, and return the path to the cached file. If it's already a local path, make sure the file exists and then return the path. Args: cache_dir: specify a cache directory to save the file to (overwrite the default cache dir). force_download: if True, re-dowload the file even if it's already cached in the cache dir. resume_download: if True, resume the download if incompletly recieved file is found. user_agent: Optional string or dict that will be appended to the user-agent on remote requests. extract_compressed_file: if True and the path point to a zip or tar file, extract the compressed file in a folder along the archive. force_extract: if True when extract_compressed_file is True and the archive was already extracted, re-extract the archive and overide the folder where it was extracted. Return: None in case of non-recoverable file (non-existent or inaccessible url + no cache on disk). Local path (string) otherwise """ if cache_dir is None: cache_dir = TRANSFORMERS_CACHE if isinstance(url_or_filename, Path): url_or_filename = str(url_or_filename) if isinstance(cache_dir, Path): cache_dir = str(cache_dir) if is_remote_url(url_or_filename): # URL, so get it from the cache (downloading if necessary) output_path = get_from_cache( url_or_filename, cache_dir=cache_dir, force_download=force_download, proxies=proxies, resume_download=resume_download, user_agent=user_agent, local_files_only=local_files_only, ) elif os.path.exists(url_or_filename): # File, and it exists. output_path = url_or_filename elif urlparse(url_or_filename).scheme == "": # File, but it doesn't exist. raise EnvironmentError("file {} not found".format(url_or_filename)) else: # Something unknown raise ValueError("unable to parse {} as a URL or as a local path".format(url_or_filename)) if extract_compressed_file: if not is_zipfile(output_path) and not tarfile.is_tarfile(output_path): return output_path # Path where we extract compressed archives # We avoid '.' in dir name and add "-extracted" at the end: "./model.zip" => "./model-zip-extracted/" output_dir, output_file = os.path.split(output_path) output_extract_dir_name = output_file.replace(".", "-") + "-extracted" output_path_extracted = os.path.join(output_dir, output_extract_dir_name) if os.path.isdir(output_path_extracted) and os.listdir(output_path_extracted) and not force_extract: return output_path_extracted # Prevent parallel extractions lock_path = output_path + ".lock" with FileLock(lock_path): shutil.rmtree(output_path_extracted, ignore_errors=True) os.makedirs(output_path_extracted) if is_zipfile(output_path): with ZipFile(output_path, "r") as zip_file: zip_file.extractall(output_path_extracted) zip_file.close() elif tarfile.is_tarfile(output_path): tar_file = tarfile.open(output_path) tar_file.extractall(output_path_extracted) tar_file.close() else: raise EnvironmentError("Archive format of {} could not be identified".format(output_path)) return output_path_extracted return output_path
################################################## # tarfile ################################################## ''' read tar files ''' # https://pymotw.com/3/tarfile/index.html # read/write access to Unix tar archives, including compressed files import tarfile for filename in ['README.txt', 'example.tar']: try: print('{:15} {}'.format(filename, tarfile.is_tarfile(filename))) except IOError as err: print('{:>15} {}'.format(filename, err)) # read the names of files in an existing archive import tarfile with tarfile.open('example.tar', 'r') as t: print(t.getnames()) # metadata about the archive members import tarfile import time with tarfile.open('example.tar', 'r') as t:
def can_decompress(file_path): return tarfile.is_tarfile(file_path) or zipfile.is_zipfile(file_path)
def _safe_is_tarfile(path): """safe version of is_tarfile, return False on IOError""" try: return tarfile.is_tarfile(path) except IOError: return False
def update(self): """ Downloads the latest source tarball from github and installs it over the existing version. """ base_url = furl(self.repositoryBase) base_url.path.add(self.repository) base_url.path.add("tarball") base_url.path.add(self.branch) tar_download_url = base_url.url main_dir = os.path.dirname(os.path.dirname(__file__)) try: self.backup() # prepare the update dir update_dir = os.path.join(main_dir, 'update') if os.path.isdir(update_dir): logger.info("Clearing out update folder " + update_dir + " before extracting") shutil.rmtree(update_dir) logger.info("Creating update folder " + update_dir + " before extracting") os.makedirs(update_dir) # retrieve file logger.info("Downloading update from " + repr(tar_download_url)) tar_download_path = os.path.join(update_dir, 'sb-update.tar') urllib.urlretrieve(tar_download_url, tar_download_path) if not os.path.isfile(tar_download_path): logger.error("Unable to retrieve new version from " + tar_download_url + ", can't update") return False if not tarfile.is_tarfile(tar_download_path): logger.error("Retrieved version from " + tar_download_url + " is corrupt, can't update") return False # extract to sb-update dir logger.info("Extracting update file " + tar_download_path) tar = tarfile.open(tar_download_path) tar.extractall(update_dir) tar.close() # delete .tar.gz logger.info("Deleting update file " + tar_download_path) os.remove(tar_download_path) # find update dir name update_dir_contents = [ x for x in os.listdir(update_dir) if os.path.isdir(os.path.join(update_dir, x)) ] if len(update_dir_contents) != 1: logger.error("Invalid update data, update failed: " + str(update_dir_contents)) return False content_dir = os.path.join(update_dir, update_dir_contents[0]) dontUpdateThese = [ ] #("msvcm90.dll", "msvcr90.dll", "msvcm90.dll") #rename exes, pyd and dll files so they can be overwritten filesToRename = [] for filename in os.listdir(main_dir): if (filename.endswith(".pyd") or filename.endswith(".dll") or filename.endswith(".exe") ) and filename not in dontUpdateThese: filesToRename.append((filename, filename + ".updated")) logger.info("Renaming %d files so they can be overwritten" % len(filesToRename)) for toRename in filesToRename: logger.debug("Renaming %s to %s" % (toRename[0], toRename[1])) shutil.move(toRename[0], toRename[1]) # walk temp folder and move files to main folder logger.info("Moving files from " + content_dir + " to " + main_dir) for dirname, dirnames, filenames in os.walk(content_dir): dirname = dirname[len(content_dir) + 1:] for curfile in filenames: if curfile not in dontUpdateThese: old_path = os.path.join(content_dir, dirname, curfile) new_path = os.path.join(main_dir, dirname, curfile) logger.debug("Updating %s" % curfile) if os.path.isfile(new_path): os.remove(new_path) os.renames(old_path, new_path) else: logger.debug("Skipping %s" % curfile) except Exception as e: logger.error("Error while trying to update: " + str(e)) return False logger.info("Update successful") return True
def __init__(self, config, batch_size, checkpoint_dir_or_path=None, var_name_substitutions=None, session_target='', **sample_kwargs): if tf.gfile.IsDirectory(checkpoint_dir_or_path): checkpoint_path = tf.train.latest_checkpoint( checkpoint_dir_or_path) else: checkpoint_path = checkpoint_dir_or_path self._config = copy.deepcopy(config) self._config.hparams.batch_size = batch_size with tf.Graph().as_default(): model = self._config.model model.build(self._config.hparams, self._config.data_converter.output_depth, is_training=False) # Input placeholders self._temperature = tf.placeholder(tf.float32, shape=()) if self._config.hparams.z_size: self._z_input = tf.placeholder( tf.float32, shape=[batch_size, self._config.hparams.z_size]) else: self._z_input = None if self._config.data_converter.control_depth > 0: self._c_input = tf.placeholder( tf.float32, shape=[None, self._config.data_converter.control_depth]) else: self._c_input = None self._inputs = tf.placeholder( tf.float32, shape=[ batch_size, None, self._config.data_converter.input_depth ]) self._controls = tf.placeholder( tf.float32, shape=[ batch_size, None, self._config.data_converter.control_depth ]) self._inputs_length = tf.placeholder( tf.int32, shape=[batch_size] + list(self._config.data_converter.length_shape)) self._max_length = tf.placeholder(tf.int32, shape=()) # Outputs self._outputs, self._decoder_results = model.sample( batch_size, max_length=self._max_length, z=self._z_input, c_input=self._c_input, temperature=self._temperature, **sample_kwargs) if self._config.hparams.z_size: q_z = model.encode(self._inputs, self._inputs_length, self._controls) self._mu = q_z.loc self._sigma = q_z.scale.diag self._z = q_z.sample() var_map = None if var_name_substitutions is not None: var_map = {} for v in tf.global_variables(): var_name = v.name[:-2] # Strip ':0' suffix. for pattern, substitution in var_name_substitutions: var_name = re.sub(pattern, substitution, var_name) if var_name != v.name[:-2]: tf.logging.info('Renaming `%s` to `%s`.', v.name[:-2], var_name) var_map[var_name] = v # Restore graph self._sess = tf.Session(target=session_target) saver = tf.train.Saver(var_map) if (os.path.exists(checkpoint_path) and tarfile.is_tarfile(checkpoint_path)): tf.logging.info('Unbundling checkpoint.') with tempfile.TemporaryDirectory() as temp_dir: tar = tarfile.open(checkpoint_path) tar.extractall(temp_dir) # Assume only a single checkpoint is in the directory. for name in tar.getnames(): if name.endswith('.index'): checkpoint_path = os.path.join( temp_dir, name[0:-6]) break saver.restore(self._sess, checkpoint_path) else: saver.restore(self._sess, checkpoint_path)
def update(self): """ Downloads the latest source tarball from github and installs it over the existing version. """ base_url = furl(self.repositoryBase) base_url.path.add(self.repository) base_url.path.add("tarball") base_url.path.add(self.branch) tar_download_url = base_url.url main_dir = os.path.dirname(os.path.dirname(__file__)) try: self.backup() # prepare the update dir update_dir = os.path.join(main_dir, 'update') if os.path.isdir(update_dir): logger.info("Clearing out update folder " + update_dir + " before extracting") shutil.rmtree(update_dir) logger.info("Creating update folder " + update_dir + " before extracting") os.makedirs(update_dir) # retrieve file logger.info("Downloading update from " + repr(tar_download_url)) tar_download_path = os.path.join(update_dir, 'sb-update.tar') response = requests.get( tar_download_url, stream=True, verify=False ) #Apparently SSL causes problems on some systems (#138)b with open(tar_download_path, 'wb') as out_file: shutil.copyfileobj(response.raw, out_file) del response if not os.path.isfile(tar_download_path): logger.error("Unable to retrieve new version from " + tar_download_url + ", can't update") return False if not tarfile.is_tarfile(tar_download_path): logger.error("Retrieved version from " + tar_download_url + " is corrupt, can't update") return False # extract to sb-update dir logger.info("Extracting update file " + tar_download_path) tar = tarfile.open(tar_download_path) tar.extractall(update_dir) tar.close() # delete .tar.gz logger.info("Deleting update file " + tar_download_path) os.remove(tar_download_path) # find update dir name update_dir_contents = [ x for x in os.listdir(update_dir) if os.path.isdir(os.path.join(update_dir, x)) ] if len(update_dir_contents) != 1: logger.error("Invalid update data, update failed: " + str(update_dir_contents)) return False content_dir = os.path.join(update_dir, update_dir_contents[0]) # walk temp folder and move files to main folder logger.info("Moving files from " + content_dir + " to " + main_dir) for dirname, dirnames, filenames in os.walk(content_dir): dirname = dirname[len(content_dir) + 1:] for curfile in filenames: old_path = os.path.join(content_dir, dirname, curfile) new_path = os.path.join(main_dir, dirname, curfile) if os.path.isfile(new_path): os.remove(new_path) os.renames(old_path, new_path) except Exception as e: logger.error("Error while trying to update: " + str(e)) return False logger.info("Update successful") return True
def install(src, dest): """Install a zip, exe, tar.gz, tar.bz2 or dmg file, and return the path of the installation folder. :param src: Path to the install file :param dest: Path to install to (to ensure we do not overwrite any existent files the folder should not exist yet) """ if not is_installer(src): msg = "{} is not a valid installer file".format(src) if '://' in src: try: return _install_url(src, dest) except Exception: exc, val, tb = sys.exc_info() msg = "{} ({})".format(msg, val) reraise(InvalidSource, msg, tb) raise InvalidSource(msg) src = os.path.realpath(src) dest = os.path.realpath(dest) did_we_create = False if not os.path.exists(dest): did_we_create = True os.makedirs(dest) trbk = None try: install_dir = None if src.lower().endswith('.dmg'): install_dir = _install_dmg(src, dest) elif src.lower().endswith('.exe'): install_dir = _install_exe(src, dest) elif zipfile.is_zipfile(src) or tarfile.is_tarfile(src): install_dir = mozfile.extract(src, dest)[0] return install_dir except BaseException: cls, exc, trbk = sys.exc_info() if did_we_create: try: # try to uninstall this properly uninstall(dest) except Exception: # uninstall may fail, let's just try to clean the folder # in this case try: mozfile.remove(dest) except Exception: pass if issubclass(cls, Exception): error = InstallError('Failed to install "%s (%s)"' % (src, str(exc))) reraise(InstallError, error, trbk) # any other kind of exception like KeyboardInterrupt is just re-raised. reraise(cls, exc, trbk) finally: # trbk won't get GC'ed due to circular reference # http://docs.python.org/library/sys.html#sys.exc_info del trbk
def is_tarfile(self): try: return tarfile.is_tarfile(self.archive_path) except Exception: return False
def populate(self): logger.info("Preprocessing dataset") was_extracted = False if len(self.filenames) > 0: file_path = os.path.join(self.save_path, self.filenames[0]) if not os.path.exists(file_path[:-7]): # nothing extracted yet if tarfile.is_tarfile(file_path): logger.info("Extracting tar file") tar = tarfile.open(file_path, "r:gz") tar.extractall(path=self.save_path) was_extracted = True tar.close() # get exact path of the extract, for robustness to changes is the 10X storage logic path_to_data, suffix = self.find_path_to_data() # get filenames, according to 10X storage logic measurements_filename = "genes.tsv" if suffix == "" else "features.tsv.gz" barcode_filename = "barcodes.tsv" + suffix matrix_filename = "matrix.mtx" + suffix expression_data = sp_io.mmread(os.path.join(path_to_data, matrix_filename)).T if self.dense: expression_data = expression_data.A else: expression_data = csr_matrix(expression_data) # group measurements by type (e.g gene, protein) # in case there are multiple measurements, e.g protein # they are indicated in the third column gene_expression_data = expression_data measurements_info = pd.read_csv( os.path.join(path_to_data, measurements_filename), sep="\t", header=None ) Ys = None if measurements_info.shape[1] < 3: gene_names = measurements_info[self.measurement_names_column].astype(np.str) else: gene_names = None for measurement_type in np.unique(measurements_info[2]): # .values required to work with sparse matrices measurement_mask = (measurements_info[2] == measurement_type).values measurement_data = expression_data[:, measurement_mask] measurement_names = measurements_info[self.measurement_names_column][ measurement_mask ].astype(np.str) if measurement_type == "Gene Expression": gene_expression_data = measurement_data gene_names = measurement_names else: Ys = [] if Ys is None else Ys if measurement_type == "Antibody Capture": measurement_type = "protein_expression" columns_attr_name = "protein_names" # protein counts do not have many zeros so always make dense if self.dense is not True: measurement_data = measurement_data.A else: measurement_type = measurement_type.lower().replace(" ", "_") columns_attr_name = measurement_type + "_names" measurement = CellMeasurement( name=measurement_type, data=measurement_data, columns_attr_name=columns_attr_name, columns=measurement_names, ) Ys.append(measurement) if gene_names is None: raise ValueError( "When loading measurements, no 'Gene Expression' category was found." ) batch_indices, cell_attributes_dict = None, None if os.path.exists(os.path.join(path_to_data, barcode_filename)): barcodes = pd.read_csv( os.path.join(path_to_data, barcode_filename), sep="\t", header=None ) cell_attributes_dict = { "barcodes": np.squeeze(np.asarray(barcodes, dtype=str)) } # As of 07/01, 10X barcodes have format "%s-%d" where the digit is a batch index starting at 1 batch_indices = np.asarray( [barcode.split("-")[-1] for barcode in cell_attributes_dict["barcodes"]] ) batch_indices = batch_indices.astype(np.int64) - 1 logger.info("Finished preprocessing dataset") self.populate_from_data( X=gene_expression_data, batch_indices=batch_indices, gene_names=gene_names, cell_attributes_dict=cell_attributes_dict, Ys=Ys, ) self.filter_cells_by_count() # cleanup if required if was_extracted and self.remove_extracted_data: logger.info("Removing extracted data at {}".format(file_path[:-7])) shutil.rmtree(file_path[:-7])
def evidencetype(self, evidence, sspath): # take initial hash of file/dir Tools().inithash(self.case_name, self.secure_store_location, self.evidence) # split filename from path set as method variable filename = evidence.split("/")[-1] # check if file or dir if os.path.isfile(evidence): # copy file to securestore os.system(f"sudo cp {evidence} {sspath}/image") print("copying to image folder in securestore...") # check file type of evidence if not tarfile.is_tarfile(evidence) or not zipfile.is_zipfile( evidence): # copy to logical to work with file imagefile = f"{sspath}/image/{filename}" os.system( f"sudo cp {sspath}/image/{filename} {sspath}/logical") logfile = f"{sspath}/logical/{filename}" # carve out file type from linux "file" query file_out = subprocess.check_output(["file", logfile]).decode("utf-8") ftype = file_out.split(":")[1].split(",")[0].split()[0].upper() # open file_type csv in config_files to find filetype file = open( os.path.dirname(os.path.abspath("file_type.csv")) + "/config_files/file_type.csv", "r") for line in file: ft = line.split(",")[0] filedescription = line.split(",")[1] if ft.upper() == ftype.upper(): print("Looks like you're working with a " + filename + ": " + filedescription) break elif (ft.upper() in ftype.upper() ) and not (ft.upper() == ftype.upper()): print("Possible match to filetype is: " + filename + ": " + filedescription) break elif filetype.guess(logfile) is not None: print('File extension: %s' % filetype.guess(logfile).extension) print('File MIME type: %s' % filetype.guess(logfile).mime) break else: print( "This file type is unknown, autofor will still try to examine it" ) break # hash for files Tools().hash(self.case_name, f"{sspath}/logical/{filename}", self.secure_store_location) # check if disk image file if "DOS/MBR" in ftype: print( f"Looks like {filename} is a disk image, Autofor will mount it for you..." ) fsplit = file_out.split(" ") if "startsector" in fsplit: offset = int( fsplit[(fsplit.index("startsector") + 1)][:-1]) * 512 # exceptions for creating directory if exists. Don't seem to work** else: offset = 0 # create mount location with max 10 characters for so not too messy fname = logfile.split("/")[-1][:10] try: print("Making mount directory...") os.system(f"sudo mkdir /mnt/{fname}") except FileExistsError: print( "Looks like this mount point already exists... Mounting..." ) # mount disk image. Haven't used exceptions os.system( f"sudo mount -o ro,loop,offset={offset} {logfile} /mnt/{fname}" ) self.mount_point = f"/mnt/{fname}" # update config json self.configupdate() # success print("Mount Complete!") # hash all files in mount point Tools().hash(self.case_name, self.mount_point, self.secure_store_location) # handling compressed images elif tarfile.is_tarfile(imagefile): print(f"Looks like {filename} is a tarfile") print(f"extracting to {sspath}/logical") # print(imagefile) f = tarfile.open(imagefile) f.extractall(path=f'{sspath}/logical') f.close() print("Extraction complete") Tools().hash(self.case_name, f"{sspath}/logical/{filename}", self.secure_store_location) elif zipfile.is_zipfile(imagefile): print(f"Looks like {filename} is a zipfile") print(f"extracting to {sspath}/logical") f = zipfile.ZipFile(imagefile) f.extractall(path=f'{sspath}/logical') print("Extraction complete") f.close() Tools().hash(self.case_name, f"{sspath}/logical", self.secure_store_location) elif os.path.isdir(evidence): # zip directory to image folder in securestore print(f"{filename} is a directory") print(f"{filename} will be compressed to {sspath}/image") print( f"{filename} will then be extracted to {sspath}/logical/{filename} for analysis" ) """shutil makes archiving dirs easier. No issues with path confusion that can be experienced with using zipfile and having to iterate through all of the files and subdirs in the root dir""" shutil.make_archive(f"{sspath}/image/{filename}", 'zip', evidence) # extract from image to logical with zipfile.ZipFile( f'{sspath}/image/{filename.split("/")[-1]}.zip', 'r') as zip: # make directory in logical incase archives are in root of archive os.system(f"mkdir {sspath}/logical/{filename}") zip.extractall(f"{sspath}/logical/{filename}") # Take second hash of logical output for dir Tools().hash(self.case_name, f"{sspath}/logical/{filename}", self.secure_store_location)
def filetype(filename): """Try to guess the type of the file.""" if os.path.isdir(filename): # Potentially a BundleTrajectory if BundleTrajectory.is_bundle(filename): return 'bundle' else: raise IOError('Directory: ' + filename) fileobj = open(filename) s3 = fileobj.read(3) if len(s3) == 0: raise IOError('Empty file: ' + filename) if filename.lower().endswith('.db') or filename.lower().endswith('.cmr'): return 'db' if is_tarfile(filename): return 'gpw' if s3 == 'CDF': from ase.io.pupynere import NetCDFFile nc = NetCDFFile(filename) if 'number_of_dynamic_atoms' in nc.dimensions: return 'dacapo' history = nc.history if history == 'GPAW restart file': return 'gpw-nc' if history == 'ASE trajectory': return 'nc' if history == 'Dacapo': return 'dacapo' if hasattr(nc, 'file_format') and nc.file_format.startswith('ETSF'): return 'etsf' raise IOError('Unknown netCDF file!') if is_zipfile(filename): return 'vnl' fileobj.seek(0) lines = fileobj.readlines(1000) if lines[0].startswith('PickleTrajectory'): return 'traj' if lines[1].startswith('OUTER LOOP:') or filename.lower().endswith( '.cube'): return 'cube' if ' ___ ___ ___ _ _ _ \n' in lines: return 'gpaw-text' if (' &&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&\n' in lines[:90]): return 'dacapo-text' for line in lines: if line[0] != '#': word = line.strip() if word in ['ANIMSTEPS', 'CRYSTAL', 'SLAB', 'POLYMER', 'MOLECULE']: return 'xsf' filename_v = os.path.basename(filename) if 'POSCAR' in filename_v or 'CONTCAR' in filename_v: return 'vasp' if 'OUTCAR' in filename_v: return 'vasp_out' if filename.lower().endswith('.exi'): return 'exi' if filename.lower().endswith('.mol'): return 'mol' if filename.lower().endswith('.pdb'): return 'pdb' if filename.lower().endswith('.cif'): return 'cif' if filename.lower().endswith('.struct'): return 'struct' if filename.lower().endswith('.struct_out'): return 'struct_out' for line in lines: if 'Invoking FHI-aims ...' in line: return 'aims_out' if 'atom' in line: data = line.split() try: a = Atoms(symbols=[data[4]], positions=[[ float(data[1]), float(data[2]), float(data[3]) ]]) return 'aims' except: pass if filename.lower().endswith('.in'): return 'aims' if filename.lower().endswith('.cfg'): return 'cfg' if os.path.split(filename)[1] == 'atoms.dat': return 'iwm' if filename.endswith('I_info'): return 'Cmdft' if lines[0].startswith('$coord') or os.path.basename(filename) == 'coord': return 'tmol' if lines[0].startswith('$grad') or os.path.basename( filename) == 'gradient': return 'tmol-gradient' if lines[0].startswith('Geometry'): return 'dftb' if filename.lower().endswith('.geom'): return 'castep_geom' if filename.lower().endswith('.castep'): return 'castep' if filename.lower().endswith('.cell'): return 'castep_cell' if s3 == '<?x': from ase.io.vtkxml import probe_vtkxml xmltype = probe_vtkxml(filename) if xmltype == 'ImageData': return 'vti' elif xmltype == 'StructuredGrid': return 'vts' elif xmltype == 'UnstructuredGrid': return 'vtu' elif xmltype is not None: raise IOError('Unknown VTK XML file!') if filename.lower().endswith('.sdf'): return 'sdf' if filename.lower().endswith('.gen'): return 'gen' if 'ITEM: TIMESTEP\n' in lines: return 'lammps' return 'xyz'
def configure_sopcast(latest_version): # Configuration for LINUX if xbmc.getCondVisibility( 'system.platform.linux' ) and not xbmc.getCondVisibility('system.platform.Android'): print("Detected OS: Linux") # Linux Armv if "arm" in os.uname()[4]: print("Sopcast Configuration - LINUX ARM") if settings.getSetting('rpi2') == "true": print("Raspberry PI 2") SPSC_KIT = os.path.join(addonpath, sopcast_raspberry.split("/")[-1]) download_tools().Downloader(sopcast_raspberry, SPSC_KIT, translate(30076), translate(30000)) if tarfile.is_tarfile(SPSC_KIT): path_libraries = os.path.join(pastaperfil, "sopcast") download_tools().extract(SPSC_KIT, path_libraries) xbmc.sleep(500) download_tools().remove(SPSC_KIT) if latest_version: settings.setSetting('sopcast_version', value=latest_version) return elif os.uname()[4] == "x86_64": generic = False if settings.getSetting('openelecx86_64') == "true": print("Detected OpenELEC x86_64") SPSC_KIT = os.path.join(addonpath, openelecx86_64_sopcast.split("/")[-1]) download_tools().Downloader(openelecx86_64_sopcast, SPSC_KIT, translate(30076), translate(30000)) if tarfile.is_tarfile(SPSC_KIT): download_tools().extract(SPSC_KIT, pastaperfil) xbmc.sleep(500) download_tools().remove(SPSC_KIT) if latest_version: settings.setSetting('sopcast_version', value=latest_version) return else: generic = True elif os.uname()[4] == "i386" or os.uname()[4] == "i686": generic = False if settings.getSetting('openeleci386') == "true": SPSC_KIT = os.path.join(addonpath, openelecxi386_sopcast.split("/")[-1]) download_tools().Downloader(openelecxi386_sopcast, SPSC_KIT, translate(30076), translate(30000)) if tarfile.is_tarfile(SPSC_KIT): download_tools().extract(SPSC_KIT, pastaperfil) xbmc.sleep(500) download_tools().remove(SPSC_KIT) if latest_version: settings.setSetting('sopcast_version', value=latest_version) return else: generic = True if generic: SPSC_KIT = os.path.join(addonpath, sopcast_linux_generico.split("/")[-1]) download_tools().Downloader(sopcast_linux_generico, SPSC_KIT, translate(30076), translate(30000)) if tarfile.is_tarfile(SPSC_KIT): path_libraries = os.path.join(pastaperfil, "sopcast") download_tools().extract(SPSC_KIT, path_libraries) xbmc.sleep(500) download_tools().remove(SPSC_KIT) # set every single file from the bundle as executable path_libraries = os.path.join(pastaperfil, "sopcast") dirs, files = xbmcvfs.listdir(path_libraries) for ficheiro in files: binary_path = os.path.join(path_libraries, ficheiro) st = os.stat(binary_path) import stat os.chmod(binary_path, st.st_mode | stat.S_IEXEC) path_libraries = os.path.join(path_libraries, "lib") dirs, files = xbmcvfs.listdir(path_libraries) for ficheiro in files: binary_path = os.path.join(path_libraries, ficheiro) st = os.stat(binary_path) import stat os.chmod(binary_path, st.st_mode | stat.S_IEXEC) if latest_version: settings.setSetting('sopcast_version', value=latest_version) return elif xbmc.getCondVisibility('system.platform.windows'): print("Detected OS: Windows") if not xbmcvfs.exists(pastaperfil): xbmcvfs.mkdir(pastaperfil) # Sop import ctypes is_admin = ctypes.windll.shell32.IsUserAnAdmin() != 0 if not is_admin: mensagemok(translate(30000), translate(30077), translate(30078)) else: cmd = ['sc', 'delete', 'sopcastp2p'] proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True) for line in proc.stdout: print("cmd out: " + line.rstrip()) xbmc.sleep(1000) ret = mensagemprogresso.create(translate(30000), translate(30000)) mensagemprogresso.update(0, translate(30117), " ") xbmc.sleep(1000) import _winreg aReg = _winreg.ConnectRegistry(None, _winreg.HKEY_LOCAL_MACHINE) try: aKey = _winreg.OpenKey(aReg, r'SOFTWARE\SopCast\Player\InstallPath', 0, _winreg.KEY_READ) name, value, type = _winreg.EnumValue(aKey, 0) sopcast_executable = value print("Installation executable of sopcast was found: " + sopcast_executable) _winreg.CloseKey(aKey) mensagemprogresso.update(10, translate(30079), translate(30080)) except: sopcast_executable = "" mensagemok(translate(30000), translate(30081), translate(30082)) if not sopcast_executable: pass else: xbmc.sleep(1000) mensagemprogresso.update(20, translate(30083), " ") xbmc.sleep(1000) print("Getting windows users IDS") aReg = _winreg.ConnectRegistry(None, _winreg.HKEY_LOCAL_MACHINE) aKey = _winreg.OpenKey( aReg, r'SOFTWARE\Microsoft\Windows NT\CurrentVersion\ProfileList' ) users = [] for i in range(1024): try: asubkey = _winreg.EnumKey(aKey, i) print(asubkey) aKeydois = _winreg.OpenKey( aReg, os.path.join( 'SOFTWARE\Microsoft\Windows NT\CurrentVersion\ProfileList', asubkey)) val = _winreg.QueryValueEx(aKeydois, "ProfileImagePath") try: print(val[0]) except: print( "Notice: User with strange characters, print cmd ignored." ) if "Windows" in val[0] or "%systemroot%" in val[0]: pass else: users.append(asubkey) except: pass if not users: mensagemok(translate(30000), translate(30084)) else: mensagemprogresso.update(30, translate(30085), translate(30080)) xbmc.sleep(200) mensagemprogresso.update(30, translate(30086), " ") xbmc.sleep(1000) print("System Users", users) srvany_final_location = os.path.join( sopcast_executable.replace("SopCast.exe", ""), "srvany.exe") srvany_download_location = os.path.join( addonpath, "srvany.exe") srvanytgz_download_location = os.path.join( addonpath, "srvany.tar.gz") download_tools().Downloader(srvany_executable, srvanytgz_download_location, translate(30087), translate(30000)) xbmc.sleep(1000) if tarfile.is_tarfile(srvanytgz_download_location): path_libraries = addonpath download_tools().extract(srvanytgz_download_location, path_libraries) xbmcvfs.copy(srvany_download_location, srvany_final_location) download_tools().remove(srvanytgz_download_location) download_tools().remove(srvany_download_location) xbmc.sleep(1000) ret = mensagemprogresso.create(translate(30000), translate(30000)) xbmc.sleep(200) mensagemprogresso.update(35, translate(30088), " ") xbmc.sleep(1000) cmd = [ 'sc', 'create', 'sopcastp2p', 'binpath=', os.path.join( os.path.join( sopcast_executable.replace("SopCast.exe", "")), 'srvany.exe') ] proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True) servicecreator = False for line in proc.stdout: print("cmd out: " + line.rstrip()) servicecreator = True if not servicecreator: mensagemok(translate(30000), translate(30089)) else: mensagemprogresso.update(40, translate(30088), translate(30080)) xbmc.sleep(1000) mensagemprogresso.update(45, translate(30090), " ") xbmc.sleep(1000) print("Trying to modify regedit....") try: aReg = _winreg.ConnectRegistry( None, _winreg.HKEY_LOCAL_MACHINE) key = _winreg.CreateKey( aReg, r'SYSTEM\CurrentControlSet\Services\sopcastp2p\Parameters' ) _winreg.SetValueEx( key, 'AppDirectory', 0, _winreg.REG_SZ, os.path.join( sopcast_executable.replace( "SopCast.exe", ""))) _winreg.SetValueEx( key, 'Application', 0, _winreg.REG_SZ, os.path.join( os.path.join( sopcast_executable.replace( "SopCast.exe", "")), "SopCast.exe")) _winreg.SetValueEx(key, 'AppParameters', 0, _winreg.REG_SZ, "sop://") mensagemprogresso.update(50, translate(30090), translate(30080)) regedit = True except: mensagemok(translate(30000), translate(30091)) regedit = False if not regedit: pass else: xbmc.sleep(1000) mensagemprogresso.update(50, translate(30092), " ") cmd = ['sc', 'sdshow', 'sopcastp2p'] proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True) lines = [] for line in proc.stdout: print(line.rstrip()) if line.rstrip() != "" and "(" in line.rstrip( ): lines.append(line.rstrip()) else: pass if len(lines) != 1: mensagemok(translate(30000), translate(30093)) else: linha_arr = [] for user in users: linha_arr.append('(A;;RPWPCR;;;' + user + ')') linha_add = '' for linha in linha_arr: linha_add += linha print("line piece to add: " + linha_add) linha_final = lines[0].replace( "S:(", linha_add + "S:(") print("Final line: " + linha_final) permissions = False xbmc.sleep(500) mensagemprogresso.update( 60, translate(30092), translate(30080)) xbmc.sleep(500) mensagemprogresso.update( 60, translate(30094), " ") cmd = [ 'sc', 'sdset', 'sopcastp2p', linha_final ] proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True) for line in proc.stdout: print(line.rstrip()) permissions = True if not permissions: mensagemok(translate(30000), translate(30095)) else: mensagemprogresso.update( 70, translate(30094), translate(30080)) xbmc.sleep(1000) mensagemprogresso.update( 70, translate(30096), " ") print( "Trying to set sopcastp2p service regedit permissions..." ) download_tools().Downloader( srvany_permissions, os.path.join( pastaperfil, "sopcastp2p-permissions.txt"), translate(30097), translate(30000)) xbmc.sleep(500) ret = mensagemprogresso.create( translate(30000), translate(30000)) xbmc.sleep(500) mensagemprogresso.update( 80, translate(30098), " ") xbmc.sleep(1000) cmd = [ 'regini', os.path.join( pastaperfil, "sopcastp2p-permissions.txt") ] proc = subprocess.Popen( cmd, stdout=subprocess.PIPE, shell=True) for line in proc.stdout: print(line.rstrip()) mensagemprogresso.update( 90, translate(30098), translate(30098)) mensagemprogresso.update( 100, translate(30099), " ") xbmc.sleep(2000) mensagemprogresso.close() if latest_version: settings.setSetting( 'sopcast_version', value=latest_version) return elif xbmc.getCondVisibility('System.Platform.OSX'): print("Detected OS: Mac OSX") available = False if os.uname()[-1] == "x86_64": mac_package = osx_x64_sopcast available = True elif os.uname()[-1] == "i386": mac_package = osx_i386_sopcast available = True else: available = False if available: if not os.path.exists(pastaperfil): xbmcvfs.mkdir(pastaperfil) MAC_KIT = os.path.join(addonpath, mac_package.split("/")[-1]) download_tools().Downloader(mac_package, MAC_KIT, translate(30076), translate(30000)) if tarfile.is_tarfile(MAC_KIT): path_libraries = os.path.join(pastaperfil) download_tools().extract(MAC_KIT, pastaperfil) download_tools().remove(MAC_KIT) sp_sc_auth = os.path.join(pastaperfil, "sopcast", "sp-sc-auth") st = os.stat(sp_sc_auth) import stat os.chmod(sp_sc_auth, st.st_mode | stat.S_IEXEC) if latest_version: settings.setSetting('sopcast_version', value=latest_version) return else: mensagemok(translate(30000), translate(30100)) return elif xbmc.getCondVisibility('System.Platform.Android'): print("Detected OS: Android") # Sopcast configuration print("Starting SopCast Configuration") # Moving sopclient to ext4 hack - tks steeve from xbmctorrent sopclient_builtin_location = os.path.join(addonpath, "resources", "binaries", "sopclient") # Hack to get current xbmc app id xbmcfolder = xbmc.translatePath(addonpath).split("/") found = False if settings.getSetting('auto_appid') == 'true': i = 0 sopcast_installed = False for folder in xbmcfolder: if folder.count('.') >= 2 and folder != addon_id: found = True break else: i += 1 if found: uid = os.getuid() app_id = xbmcfolder[i] else: if settings.getSetting('custom_appid') != '': uid = os.getuid() app_id = settings.getSetting('custom_appid') found = True if found: xbmc_data_path = os.path.join("/data", "data", app_id) if os.path.exists(xbmc_data_path) and uid == os.stat( xbmc_data_path).st_uid: android_binary_dir = os.path.join(xbmc_data_path, "files", "program.plexus") if not os.path.exists(android_binary_dir): os.makedirs(android_binary_dir) android_binary_path = os.path.join(android_binary_dir, "sopclient") if not os.path.exists(android_binary_path) or os.path.getsize( android_binary_path) != os.path.getsize( sopclient_builtin_location): shutil.copy2(sopclient_builtin_location, android_binary_path) binary_path = android_binary_path st = os.stat(binary_path) import stat os.chmod(binary_path, st.st_mode | stat.S_IEXEC) settings.setSetting('android_sopclient', value=binary_path) opcao = xbmcgui.Dialog().yesno(translate(30000), translate(30101), translate(30103)) if not opcao: settings.setSetting('external-sopcast', value='0') sopcast_installed = True mensagemok(translate(30000), translate(30099)) else: mensagemok(translate(30000), translate(30104)) if os.path.exists(os.path.join("sdcard", "Download")): pasta = os.path.join("sdcard", "Download") sopfile = os.path.join("sdcard", "Download", sopcast_apk.split("/")[-1]) else: dialog = xbmcgui.Dialog() pasta = dialog.browse(int(0), translate(30105), 'videos') sopfile = os.path.join(pasta, sopcast_apk.split("/")[-1]) download_tools().Downloader(sopcast_apk, sopfile, translate(30106), translate(30000)) if tarfile.is_tarfile(sopfile): download_tools().extract(sopfile, pasta) download_tools().remove(sopfile) mensagemok(translate(30000), translate(30107), pasta, translate(30108)) sopcast_installed = True settings.setSetting('external-sopcast', value='1') mensagemok(translate(30000), translate(30099)) if latest_version: settings.setSetting('sopcast_version', value=latest_version) return else: mensagemok(translate(30000), translate(30109)) return
def maybe_download_and_extract(filename, working_directory, url_source, extract=False, expected_bytes=None): """Checks if file exists in working_directory otherwise tries to dowload the file, and optionally also tries to extract the file if format is ".zip" or ".tar" Parameters ---------- filename : string The name of the (to be) dowloaded file. working_directory : string A folder path to search for the file in and dowload the file to url : string The URL to download the file from extract : bool, defaults to False If True, tries to uncompress the dowloaded file is ".tar.gz/.tar.bz2" or ".zip" file expected_bytes : int/None If set tries to verify that the downloaded file is of the specified size, otherwise raises an Exception, defaults to None which corresponds to no check being performed Returns ---------- filepath to dowloaded (uncompressed) file Examples -------- >>> down_file = tl.files.maybe_download_and_extract(filename = 'train-images-idx3-ubyte.gz', working_directory = 'data/', url_source = 'http://yann.lecun.com/exdb/mnist/') >>> tl.files.maybe_download_and_extract(filename = 'ADEChallengeData2016.zip', working_directory = 'data/', url_source = 'http://sceneparsing.csail.mit.edu/data/', extract=True) """ # We first define a download function, supporting both Python 2 and 3. def _download(filename, working_directory, url_source): def _dlProgress(count, blockSize, totalSize): if(totalSize != 0): percent = float(count * blockSize) / float(totalSize) * 100.0 sys.stdout.write("\r" "Downloading " + filename + "...%d%%" % percent) sys.stdout.flush() if sys.version_info[0] == 2: from urllib import urlretrieve else: from urllib.request import urlretrieve filepath = os.path.join(working_directory, filename) urlretrieve(url_source+filename, filepath, reporthook=_dlProgress) exists_or_mkdir(working_directory, verbose=False) filepath = os.path.join(working_directory, filename) if not os.path.exists(filepath): _download(filename, working_directory, url_source) print() statinfo = os.stat(filepath) print('Succesfully downloaded', filename, statinfo.st_size, 'bytes.') if(not(expected_bytes is None) and (expected_bytes != statinfo.st_size)): raise Exception('Failed to verify ' + filename + '. Can you get to it with a browser?') if(extract): if tarfile.is_tarfile(filepath): print('Trying to extract tar file') tarfile.open(filepath, 'r').extractall(working_directory) print('... Success!') elif zipfile.is_zipfile(filepath): print('Trying to extract zip file') with zipfile.ZipFile(filepath) as zf: zf.extractall(working_directory) print('... Success!') else: print("Unknown compression_format only .tar.gz/.tar.bz2/.tar and .zip supported") return filepath
def cached_path( url_or_filename, download_config=None, **download_kwargs, ) -> Optional[str]: """ Given something that might be a URL (or might be a local path), determine which. If it's a URL, download the file and cache it, and return the path to the cached file. If it's already a local path, make sure the file exists and then return the path. Return: Local path (string) Raises: FileNotFoundError: in case of non-recoverable file (non-existent or no cache on disk) ConnectionError: in case of unreachable url and no cache on disk ValueError: if it couldn't parse the url or filename correctly """ if download_config is None: download_config = DownloadConfig(**download_kwargs) cache_dir = download_config.cache_dir or HF_DATASETS_CACHE if isinstance(cache_dir, Path): cache_dir = str(cache_dir) if isinstance(url_or_filename, Path): url_or_filename = str(url_or_filename) if is_remote_url(url_or_filename): # URL, so get it from the cache (downloading if necessary) output_path = get_from_cache( url_or_filename, cache_dir=cache_dir, force_download=download_config.force_download, proxies=download_config.proxies, resume_download=download_config.resume_download, user_agent=download_config.user_agent, local_files_only=download_config.local_files_only, use_etag=download_config.use_etag, ) elif os.path.exists(url_or_filename): # File, and it exists. output_path = url_or_filename elif urlparse(url_or_filename).scheme == "": # File, but it doesn't exist. raise FileNotFoundError( "Local file {} doesn't exist".format(url_or_filename)) else: # Something unknown raise ValueError( "unable to parse {} as a URL or as a local path".format( url_or_filename)) if download_config.extract_compressed_file and output_path is not None: if not is_zipfile(output_path) and not tarfile.is_tarfile( output_path) and not is_gzip(output_path): return output_path # Path where we extract compressed archives # We extract in the cache dir, and get the extracted path name by hashing the original path" abs_output_path = os.path.abspath(output_path) output_path_extracted = os.path.join( cache_dir, "extracted", hash_url_to_filename(abs_output_path)) if (os.path.isdir(output_path_extracted) and os.listdir(output_path_extracted) and not download_config.force_extract) or ( os.path.isfile(output_path_extracted) and not download_config.force_extract): return output_path_extracted # Prevent parallel extractions lock_path = output_path + ".lock" with FileLock(lock_path): shutil.rmtree(output_path_extracted, ignore_errors=True) os.makedirs(output_path_extracted, exist_ok=True) if tarfile.is_tarfile(output_path): tar_file = tarfile.open(output_path) tar_file.extractall(output_path_extracted) tar_file.close() elif is_gzip(output_path): os.rmdir(output_path_extracted) with gzip.open(output_path, "rb") as gzip_file: with open(output_path_extracted, "wb") as extracted_file: shutil.copyfileobj(gzip_file, extracted_file) elif is_zipfile( output_path ): # put zip file to the last, b/c it is possible wrongly detected as zip with ZipFile(output_path, "r") as zip_file: zip_file.extractall(output_path_extracted) zip_file.close() else: raise EnvironmentError( "Archive format of {} could not be identified".format( output_path)) return output_path_extracted return output_path
def configure_acestream(latest_version): # Configuration for LINUX if xbmc.getCondVisibility( 'system.platform.linux' ) and not xbmc.getCondVisibility('system.platform.Android'): print("Detected OS: Linux") if "arm" in os.uname()[4]: print("Linux Arm") if settings.getSetting('rpi2') == "true": ACE_KIT = os.path.join(addonpath, "resources", "binaries", "acestream_rpi.tar.gz") if tarfile.is_tarfile(ACE_KIT): path_libraries = os.path.join(pastaperfil) download_tools().extract(ACE_KIT, path_libraries) xbmc.sleep(500) # set chroot to executable binary_path = os.path.join(pastaperfil, "acestream", "chroot") st = os.stat(binary_path) import stat os.chmod(binary_path, st.st_mode | stat.S_IEXEC) if latest_version: settings.setSetting('acestream_version', value=latest_version) return elif os.uname()[4] == "x86_64": if settings.getSetting('openelecx86_64') == "true": print("OpenELEC x86_64 Acestream configuration") ACE_KIT = os.path.join(addonpath, openeelcx86_64_acestream.split("/")[-1]) download_tools().Downloader(openeelcx86_64_acestream, ACE_KIT, translate(30110), translate(30000)) if tarfile.is_tarfile(ACE_KIT): download_tools().extract(ACE_KIT, pastaperfil) xbmc.sleep(500) download_tools().remove(ACE_KIT) if latest_version: settings.setSetting('acestream_version', value=latest_version) return else: print("64 bit Linux Disto Acestream Configuration") ACE_KIT = os.path.join( addonpath, acestream_linux_x64_generic.split("/")[-1]) download_tools().Downloader(acestream_linux_x64_generic, ACE_KIT, translate(30110), translate(30000)) if tarfile.is_tarfile(ACE_KIT): download_tools().extract(ACE_KIT, pastaperfil) xbmc.sleep(500) download_tools().remove(ACE_KIT) if latest_version: settings.setSetting('acestream_version', value=latest_version) return elif os.uname()[4] == "i386" or os.uname()[4] == "i686": if settings.getSetting('openeleci386') == "true": print("32 bit Openelec Acestream Configuration") ACE_KIT = os.path.join(addonpath, openeelcxi386_acestream.split("/")[-1]) download_tools().Downloader(openeelcxi386_acestream, ACE_KIT, translate(30110), translate(30000)) if tarfile.is_tarfile(ACE_KIT): download_tools().extract(ACE_KIT, pastaperfil) xbmc.sleep(500) download_tools().remove(ACE_KIT) if latest_version: settings.setSetting('acestream_version', value=latest_version) return else: print("32 bit Linux general distro Acestream Configuration") ACE_KIT = os.path.join( addonpath, acestream_linux_i386_generic.split("/")[-1]) download_tools().Downloader(acestream_linux_i386_generic, ACE_KIT, translate(30110), translate(30000)) if tarfile.is_tarfile(ACE_KIT): download_tools().extract(ACE_KIT, pastaperfil) xbmc.sleep(500) download_tools().remove(ACE_KIT) if latest_version: settings.setSetting('acestream_version', value=latest_version) return elif xbmc.getCondVisibility('system.platform.windows'): print("Detected OS: Windows") if not os.path.exists(pastaperfil): xbmcvfs.mkdir(pastaperfil) # Ace SPSC_KIT = os.path.join(addonpath, acestream_windows.split("/")[-1]) download_tools().Downloader(acestream_windows, SPSC_KIT, translate(30110), translate(30000)) if os.path.exists(os.path.join(pastaperfil, "acestream")): shutil.rmtree(os.path.join(pastaperfil, "acestream")) if os.path.exists(os.path.join(pastaperfil, "player")): shutil.rmtree(os.path.join(pastaperfil, "player")) if tarfile.is_tarfile(SPSC_KIT): path_libraries = os.path.join(pastaperfil) download_tools().extract(SPSC_KIT, path_libraries) download_tools().remove(SPSC_KIT) if latest_version: settings.setSetting('acestream_version', value=latest_version) return elif xbmc.getCondVisibility('System.Platform.OSX'): print("Detected OS: Mac OSX") available = False if os.uname()[-1] == "x86_64": mac_package = osx_x64_acestream available = True elif os.uname()[-1] == "i386": mac_package = osx_i386_acestream available = True else: available = False if available: MAC_KIT = os.path.join('/Applications', mac_package.split("/")[-1]) if not xbmcvfs.exists( os.path.join('/Applications', 'Ace Stream.app')): download_tools().Downloader(mac_package, MAC_KIT, translate(30110), translate(30000)) if xbmcvfs.exists(MAC_KIT): xbmc.sleep(1000) cmd = 'unzip /Applications/AceStreamWineOSX.zip' zipa = subprocess.Popen(cmd, shell=True) cmd = 'chmod -R 755 /Applications/Ace\ Stream.app' print cmd chmod = subprocess.Popen(cmd, shell=True) try: os.remove(MAC_KIT) except: pass if latest_version: settings.setSetting('acestream_version', value=latest_version) return else: mensagemok(translate(30000), translate(30100)) return elif xbmc.getCondVisibility('System.Platform.Android'): print("Detected OS: Android") print("Starting Acestream Configuration") # acestream config for android if not os.path.exists(pastaperfil): xbmcvfs.mkdir(pastaperfil) # Hack to get xbmc app id xbmcfolder = xbmc.translatePath(addonpath).split("/") found = False if settings.getSetting('auto_appid') == 'true': i = 0 sopcast_installed = False for folder in xbmcfolder: if folder.count('.') >= 2 and folder != addon_id: found = True break else: i += 1 if found: uid = os.getuid() app_id = xbmcfolder[i] else: if settings.getSetting('custom_appid') != '': uid = os.getuid() app_id = settings.getSetting('custom_appid') found = True if found: settings.setSetting('app_id', app_id) # Acestreamconfiguration for android starts here if "arm" in os.uname()[4]: acebundle = os.path.join(pastaperfil, android_aceengine_arm.split("/")[-1]) download_tools().Downloader(android_aceengine_arm, acebundle, translate(30111), translate(30000)) else: acebundle = os.path.join(pastaperfil, android_aceengine_x86.split("/")[-1]) download_tools().Downloader(android_aceengine_x86, acebundle, translate(30111), translate(30000)) if tarfile.is_tarfile(acebundle): download_tools().extract(acebundle, pastaperfil) download_tools().remove(acebundle) orgacestreamenginefolder = os.path.join(pastaperfil, "org.acestream.engine") xbmc_data_path = os.path.join("/data", "data", app_id) if os.path.exists(xbmc_data_path) and uid == os.stat( xbmc_data_path).st_uid: android_binary_dir = os.path.join(xbmc_data_path, "files", "program.plexus") if not os.path.exists(android_binary_dir): os.makedirs(android_binary_dir) android_acestream_folder = os.path.join(android_binary_dir, "org.acestream.engine") if not os.path.exists(android_acestream_folder): os.makedirs(android_acestream_folder) else: # clean install for android - delete old folder print android_acestream_folder try: os.system("chmod -R 777 " + android_acestream_folder + "/*") os.system("rm -r '" + android_acestream_folder + "'") except: pass try: os.makedirs(android_acestream_folder) except: pass xbmc.sleep(200) # clean install in android - remove /sdcard/.ACEStream folder if it exists (to be enabled between versions if we need to remove older settings # if os.path.exists(os.path.join('/sdcard','.ACEStream')): # try: # hidden_ace = os.path.join('/sdcard','.ACEStream') # os.system("chmod -R 777 "+hidden_ace+"/*") # os.system("rm -r '"+hidden_ace+"'") # except: pass recursive_overwrite(orgacestreamenginefolder, android_acestream_folder, ignore=None) pythonbin = os.path.join(android_acestream_folder, "files", "python", "bin", "python") st = os.stat(pythonbin) import stat os.chmod(pythonbin, st.st_mode | stat.S_IEXEC) if os.path.exists(orgacestreamenginefolder): try: os.system("chmod -R 777 " + orgacestreamenginefolder + "/*") os.system("rm -r '" + orgacestreamenginefolder + "'") except: pass try: xbmcvfs.mkdir(os.path.join('/sdcard', 'org.acestream.engine')) except: pass opcao = xbmcgui.Dialog().yesno(translate(30000), translate(30112), translate(30113)) if not opcao: settings.setSetting('engine_app', '0') else: mensagemok(translate(30000), translate(30114), translate(30115), translate(30116)) if os.path.exists(os.path.join("sdcard", "Download")): pasta = os.path.join("sdcard", "Download") if "arm" in os.uname()[4]: acefile = os.path.join( "sdcard", "Download", acestreamengine_apk_arm.split("/")[-1]) else: acefile = os.path.join( "sdcard", "Download", acestreamengine_apk_x86.split("/")[-1]) else: dialog = xbmcgui.Dialog() pasta = dialog.browse(int(0), translate(30105), 'myprograms') if "arm" in os.uname()[4]: acefile = os.path.join( pasta, acestreamengine_apk_arm.split("/")[-1]) else: acefile = os.path.join( pasta, acestreamengine_apk_x86.split("/")[-1]) if "arm" in os.uname()[4]: download_tools().Downloader(acestreamengine_apk_arm, acefile, translate(30117), translate(30000)) else: download_tools().Downloader(acestreamengine_apk_x86, acefile, translate(30117), translate(30000)) if tarfile.is_tarfile(acefile): download_tools().extract(acefile, pasta) download_tools().remove(acefile) xbmc.sleep(2000) mensagemok(translate(30000), translate(30118), pasta, translate(30108)) mensagemok(translate(30000), translate(30119), translate(30120), translate(30121)) settings.setSetting('engine_app', '1') opcao = xbmcgui.Dialog().yesno(translate(30000), translate(30122), translate(30123)) if opcao: if os.path.exists(os.path.join("sdcard", "Download")): pasta = os.path.join("sdcard", "Download") if "arm" in os.uname()[4]: acefile = os.path.join( "sdcard", "Download", android_aceplayer_arm.split("/")[-1]) else: os.path.join("sdcard", "Download", android_aceplayer_x86.split("/")[-1]) else: dialog = xbmcgui.Dialog() pasta = dialog.browse(int(0), translate(30105), 'myprograms') if "arm" in os.uname()[4]: acefile = os.path.join( pasta, acestreamengine_apk_arm.split("/")[-1]) else: acefile = os.path.join( pasta, acestreamengine_apk_x86.split("/")[-1]) if "arm" in os.uname()[4]: download_tools().Downloader(android_aceplayer_arm, acefile, translate(30124), translate(30000)) else: download_tools().Downloader(android_aceplayer_x86, acefile, translate(30124), translate(30000)) if tarfile.is_tarfile(acefile): download_tools().extract(acefile, pasta) download_tools().remove(acefile) xbmc.sleep(2000) mensagemok(translate(30000), translate(30125), pasta, translate(30108)) opcao = xbmcgui.Dialog().yesno(translate(30000), translate(30126)) if opcao: settings.setSetting('engine_app', '2') if latest_version: settings.setSetting('acestream_version', value=latest_version) mensagemok(translate(30000), translate(30127)) return else: mensagemok(translate(30000), translate(30109)) return
def _uncompress_file(file_, delete_archive=True, verbose=1): """Uncompress files contained in a data_set. Parameters ---------- file: string path of file to be uncompressed. delete_archive: bool, optional Wheteher or not to delete archive once it is uncompressed. Default: True verbose: int, optional verbosity level (0 means no message). Notes ----- This handles zip, tar, gzip and bzip files only. """ if verbose > 0: sys.stderr.write('Extracting data from %s...' % file_) data_dir = os.path.dirname(file_) # We first try to see if it is a zip file try: filename, ext = os.path.splitext(file_) with open(file_, "rb") as fd: header = fd.read(4) processed = False if zipfile.is_zipfile(file_): z = zipfile.ZipFile(file_) z.extractall(path=data_dir) z.close() if delete_archive: os.remove(file_) file_ = filename processed = True elif ext == '.gz' or header.startswith(b'\x1f\x8b'): import gzip if ext == '.tgz': filename = filename + '.tar' elif ext == '': # For gzip file, we rely on the assumption that there is an extenstion shutil.move(file_, file_ + '.gz') file_ = file_ + '.gz' gz = gzip.open(file_) out = open(filename, 'wb') shutil.copyfileobj(gz, out, 8192) gz.close() out.close() # If file is .tar.gz, this will be handle in the next case if delete_archive: os.remove(file_) file_ = filename processed = True if os.path.isfile(file_) and tarfile.is_tarfile(file_): with contextlib.closing(tarfile.open(file_, "r")) as tar: tar.extractall(path=data_dir) if delete_archive: os.remove(file_) processed = True if not processed: raise IOError("[Uncompress] unknown archive file format: %s" % file_) if verbose > 0: sys.stderr.write('.. done.\n') except Exception as e: if verbose > 0: print('Error uncompressing file: %s' % e) raise
def import_data_dj( in_path, group=None, ignore_unknown_nodes=False, extras_mode_existing='kcl', extras_mode_new='import', comment_mode='newest', silent=False ): """Import exported AiiDA archive to the AiiDA database and repository. Specific for the Django backend. If ``in_path`` is a folder, calls extract_tree; otherwise, tries to detect the compression format (zip, tar.gz, tar.bz2, ...) and calls the correct function. :param in_path: the path to a file or folder that can be imported in AiiDA. :type in_path: str :param group: Group wherein all imported Nodes will be placed. :type group: :py:class:`~aiida.orm.groups.Group` :param extras_mode_existing: 3 letter code that will identify what to do with the extras import. The first letter acts on extras that are present in the original node and not present in the imported node. Can be either: 'k' (keep it) or 'n' (do not keep it). The second letter acts on the imported extras that are not present in the original node. Can be either: 'c' (create it) or 'n' (do not create it). The third letter defines what to do in case of a name collision. Can be either: 'l' (leave the old value), 'u' (update with a new value), 'd' (delete the extra), or 'a' (ask what to do if the content is different). :type extras_mode_existing: str :param extras_mode_new: 'import' to import extras of new nodes or 'none' to ignore them. :type extras_mode_new: str :param comment_mode: Comment import modes (when same UUIDs are found). Can be either: 'newest' (will keep the Comment with the most recent modification time (mtime)) or 'overwrite' (will overwrite existing Comments with the ones from the import file). :type comment_mode: str :param silent: suppress prints. :type silent: bool :return: New and existing Nodes and Links. :rtype: dict :raises `~aiida.tools.importexport.common.exceptions.ImportValidationError`: if parameters or the contents of `metadata.json` or `data.json` can not be validated. :raises `~aiida.tools.importexport.common.exceptions.CorruptArchive`: if the provided archive at ``in_path`` is corrupted. :raises `~aiida.tools.importexport.common.exceptions.IncompatibleArchiveVersionError`: if the provided archive's export version is not equal to the export version of AiiDA at the moment of import. :raises `~aiida.tools.importexport.common.exceptions.ArchiveImportError`: if there are any internal errors when importing. :raises `~aiida.tools.importexport.common.exceptions.ImportUniquenessError`: if a new unique entity can not be created. """ from django.db import transaction # pylint: disable=import-error,no-name-in-module from aiida.backends.djsite.db import models # This is the export version expected by this function expected_export_version = StrictVersion(EXPORT_VERSION) # The returned dictionary with new and existing nodes and links ret_dict = {} # Initial check(s) if group: if not isinstance(group, Group): raise exceptions.ImportValidationError('group must be a Group entity') elif not group.is_stored: group.store() ################ # EXTRACT DATA # ################ # The sandbox has to remain open until the end with SandboxFolder() as folder: if os.path.isdir(in_path): extract_tree(in_path, folder) else: if tarfile.is_tarfile(in_path): extract_tar(in_path, folder, silent=silent, nodes_export_subfolder=NODES_EXPORT_SUBFOLDER) elif zipfile.is_zipfile(in_path): try: extract_zip(in_path, folder, silent=silent, nodes_export_subfolder=NODES_EXPORT_SUBFOLDER) except ValueError as exc: print('The following problem occured while processing the provided file: {}'.format(exc)) return else: raise exceptions.ImportValidationError( 'Unable to detect the input file format, it is neither a ' '(possibly compressed) tar file, nor a zip file.' ) if not folder.get_content_list(): raise exceptions.CorruptArchive('The provided file/folder ({}) is empty'.format(in_path)) try: with io.open(folder.get_abs_path('metadata.json'), 'r', encoding='utf8') as fhandle: metadata = json.load(fhandle) with io.open(folder.get_abs_path('data.json'), 'r', encoding='utf8') as fhandle: data = json.load(fhandle) except IOError as error: raise exceptions.CorruptArchive( 'Unable to find the file {} in the import file or folder'.format(error.filename) ) ###################### # PRELIMINARY CHECKS # ###################### export_version = StrictVersion(str(metadata['export_version'])) if export_version != expected_export_version: msg = 'Export file version is {}, can import only version {}'\ .format(metadata['export_version'], expected_export_version) if export_version < expected_export_version: msg += "\nUse 'verdi export migrate' to update this export file." else: msg += '\nUpdate your AiiDA version in order to import this file.' raise exceptions.IncompatibleArchiveVersionError(msg) ########################################################################## # CREATE UUID REVERSE TABLES AND CHECK IF I HAVE ALL NODES FOR THE LINKS # ########################################################################## linked_nodes = set(chain.from_iterable((l['input'], l['output']) for l in data['links_uuid'])) group_nodes = set(chain.from_iterable(six.itervalues(data['groups_uuid']))) if NODE_ENTITY_NAME in data['export_data']: import_nodes_uuid = set(v['uuid'] for v in data['export_data'][NODE_ENTITY_NAME].values()) else: import_nodes_uuid = set() # the combined set of linked_nodes and group_nodes was obtained from looking at all the links # the set of import_nodes_uuid was received from the stuff actually referred to in export_data unknown_nodes = linked_nodes.union(group_nodes) - import_nodes_uuid if unknown_nodes and not ignore_unknown_nodes: raise exceptions.DanglingLinkError( 'The import file refers to {} nodes with unknown UUID, therefore it cannot be imported. Either first ' 'import the unknown nodes, or export also the parents when exporting. The unknown UUIDs are:\n' ''.format(len(unknown_nodes)) + '\n'.join('* {}'.format(uuid) for uuid in unknown_nodes) ) ################################### # DOUBLE-CHECK MODEL DEPENDENCIES # ################################### # The entity import order. It is defined by the database model relationships. model_order = ( USER_ENTITY_NAME, COMPUTER_ENTITY_NAME, NODE_ENTITY_NAME, GROUP_ENTITY_NAME, LOG_ENTITY_NAME, COMMENT_ENTITY_NAME ) for import_field_name in metadata['all_fields_info']: if import_field_name not in model_order: raise exceptions.ImportValidationError( "You are trying to import an unknown model '{}'!".format(import_field_name) ) for idx, model_name in enumerate(model_order): dependencies = [] for field in metadata['all_fields_info'][model_name].values(): try: dependencies.append(field['requires']) except KeyError: # (No ForeignKey) pass for dependency in dependencies: if dependency not in model_order[:idx]: raise exceptions.ArchiveImportError( 'Model {} requires {} but would be loaded first; stopping...'.format(model_name, dependency) ) ################################################### # CREATE IMPORT DATA DIRECT UNIQUE_FIELD MAPPINGS # ################################################### import_unique_ids_mappings = {} for model_name, import_data in data['export_data'].items(): if model_name in metadata['unique_identifiers']: # I have to reconvert the pk to integer import_unique_ids_mappings[model_name] = { int(k): v[metadata['unique_identifiers'][model_name]] for k, v in import_data.items() } ############### # IMPORT DATA # ############### # DO ALL WITH A TRANSACTION with transaction.atomic(): foreign_ids_reverse_mappings = {} new_entries = {} existing_entries = {} # I first generate the list of data for model_name in model_order: cls_signature = entity_names_to_signatures[model_name] model = get_object_from_string(cls_signature) fields_info = metadata['all_fields_info'].get(model_name, {}) unique_identifier = metadata['unique_identifiers'].get(model_name, None) new_entries[model_name] = {} existing_entries[model_name] = {} foreign_ids_reverse_mappings[model_name] = {} # Not necessarily all models are exported if model_name in data['export_data']: # skip nodes that are already present in the DB if unique_identifier is not None: import_unique_ids = set(v[unique_identifier] for v in data['export_data'][model_name].values()) relevant_db_entries_result = model.objects.filter( **{'{}__in'.format(unique_identifier): import_unique_ids} ) # Note: uuids need to be converted to strings relevant_db_entries = { str(getattr(n, unique_identifier)): n for n in relevant_db_entries_result } foreign_ids_reverse_mappings[model_name] = {k: v.pk for k, v in relevant_db_entries.items()} for key, value in data['export_data'][model_name].items(): if value[unique_identifier] in relevant_db_entries.keys(): # Already in DB existing_entries[model_name][key] = value else: # To be added new_entries[model_name][key] = value else: new_entries[model_name] = data['export_data'][model_name].copy() # Show Comment mode if not silent if not silent: print('Comment mode: {}'.format(comment_mode)) # I import data from the given model for model_name in model_order: cls_signature = entity_names_to_signatures[model_name] model = get_object_from_string(cls_signature) fields_info = metadata['all_fields_info'].get(model_name, {}) unique_identifier = metadata['unique_identifiers'].get(model_name, None) # EXISTING ENTRIES for import_entry_pk, entry_data in existing_entries[model_name].items(): unique_id = entry_data[unique_identifier] existing_entry_id = foreign_ids_reverse_mappings[model_name][unique_id] import_data = dict( deserialize_field( k, v, fields_info=fields_info, import_unique_ids_mappings=import_unique_ids_mappings, foreign_ids_reverse_mappings=foreign_ids_reverse_mappings ) for k, v in entry_data.items() ) # TODO COMPARE, AND COMPARE ATTRIBUTES if model is models.DbComment: new_entry_uuid = merge_comment(import_data, comment_mode) if new_entry_uuid is not None: entry_data[unique_identifier] = new_entry_uuid new_entries[model_name][import_entry_pk] = entry_data if model_name not in ret_dict: ret_dict[model_name] = {'new': [], 'existing': []} ret_dict[model_name]['existing'].append((import_entry_pk, existing_entry_id)) if not silent: print('existing %s: %s (%s->%s)' % (model_name, unique_id, import_entry_pk, existing_entry_id)) # print(" `-> WARNING: NO DUPLICITY CHECK DONE!") # CHECK ALSO FILES! # Store all objects for this model in a list, and store them all in once at the end. objects_to_create = [] # This is needed later to associate the import entry with the new pk import_new_entry_pks = {} imported_comp_names = set() # NEW ENTRIES for import_entry_pk, entry_data in new_entries[model_name].items(): unique_id = entry_data[unique_identifier] import_data = dict( deserialize_field( k, v, fields_info=fields_info, import_unique_ids_mappings=import_unique_ids_mappings, foreign_ids_reverse_mappings=foreign_ids_reverse_mappings ) for k, v in entry_data.items() ) if model is models.DbGroup: # Check if there is already a group with the same name dupl_counter = 0 orig_label = import_data['label'] while model.objects.filter(label=import_data['label']): import_data['label'] = orig_label + DUPL_SUFFIX.format(dupl_counter) dupl_counter += 1 if dupl_counter == 100: raise exceptions.ImportUniquenessError( 'A group of that label ( {} ) already exists and I could not create a new one' ''.format(orig_label) ) elif model is models.DbComputer: # Check if there is already a computer with the same name in the database dupl = ( model.objects.filter(name=import_data['name']) or import_data['name'] in imported_comp_names ) orig_name = import_data['name'] dupl_counter = 0 while dupl: # Rename the new computer import_data['name'] = (orig_name + DUPL_SUFFIX.format(dupl_counter)) dupl = ( model.objects.filter(name=import_data['name']) or import_data['name'] in imported_comp_names ) dupl_counter += 1 if dupl_counter == 100: raise exceptions.ImportUniquenessError( 'A computer of that name ( {} ) already exists and I could not create a new one' ''.format(orig_name) ) imported_comp_names.add(import_data['name']) objects_to_create.append(model(**import_data)) import_new_entry_pks[unique_id] = import_entry_pk if model_name == NODE_ENTITY_NAME: if not silent: print('STORING NEW NODE REPOSITORY FILES...') # NEW NODES for object_ in objects_to_create: import_entry_uuid = object_.uuid import_entry_pk = import_new_entry_pks[import_entry_uuid] # Before storing entries in the DB, I store the files (if these are nodes). # Note: only for new entries! subfolder = folder.get_subfolder( os.path.join(NODES_EXPORT_SUBFOLDER, export_shard_uuid(import_entry_uuid)) ) if not subfolder.exists(): raise exceptions.CorruptArchive( 'Unable to find the repository folder for Node with UUID={} in the exported ' 'file'.format(import_entry_uuid) ) destdir = RepositoryFolder(section=Repository._section_name, uuid=import_entry_uuid) # Replace the folder, possibly destroying existing previous folders, and move the files # (faster if we are on the same filesystem, and in any case the source is a SandboxFolder) destdir.replace_with_folder(subfolder.abspath, move=True, overwrite=True) # For DbNodes, we also have to store its attributes if not silent: print('STORING NEW NODE ATTRIBUTES...') # Get attributes from import file try: object_.attributes = data['node_attributes'][str(import_entry_pk)] except KeyError: raise exceptions.CorruptArchive( 'Unable to find attribute info for Node with UUID={}'.format(import_entry_uuid) ) # For DbNodes, we also have to store its extras if extras_mode_new == 'import': if not silent: print('STORING NEW NODE EXTRAS...') # Get extras from import file try: extras = data['node_extras'][str(import_entry_pk)] except KeyError: raise exceptions.CorruptArchive( 'Unable to find extra info for Node with UUID={}'.format(import_entry_uuid) ) # TODO: remove when aiida extras will be moved somewhere else # from here extras = {key: value for key, value in extras.items() if not key.startswith('_aiida_')} if object_.node_type.endswith('code.Code.'): extras = {key: value for key, value in extras.items() if not key == 'hidden'} # till here object_.extras = extras elif extras_mode_new == 'none': if not silent: print('SKIPPING NEW NODE EXTRAS...') else: raise exceptions.ImportValidationError( "Unknown extras_mode_new value: {}, should be either 'import' or 'none'" ''.format(extras_mode_new) ) # EXISTING NODES (Extras) # For the existing nodes that are also in the imported list we also update their extras if necessary if not silent: print('UPDATING EXISTING NODE EXTRAS (mode: {})'.format(extras_mode_existing)) import_existing_entry_pks = { entry_data[unique_identifier]: import_entry_pk for import_entry_pk, entry_data in existing_entries[model_name].items() } for node in models.DbNode.objects.filter(uuid__in=import_existing_entry_pks).all(): # pylint: disable=no-member import_entry_uuid = str(node.uuid) import_entry_pk = import_existing_entry_pks[import_entry_uuid] # Get extras from import file try: extras = data['node_extras'][str(import_entry_pk)] except KeyError: raise exceptions.CorruptArchive( 'Unable to find extra info for ode with UUID={}'.format(import_entry_uuid) ) # TODO: remove when aiida extras will be moved somewhere else # from here extras = {key: value for key, value in extras.items() if not key.startswith('_aiida_')} if node.node_type.endswith('code.Code.'): extras = {key: value for key, value in extras.items() if not key == 'hidden'} # till here node.extras = merge_extras(node.extras, extras, extras_mode_existing) # Already saving existing node here to update its extras node.save() # If there is an mtime in the field, disable the automatic update # to keep the mtime that we have set here if 'mtime' in [field.name for field in model._meta.local_fields]: with models.suppress_auto_now([(model, ['mtime'])]): # Store them all in once; however, the PK are not set in this way... model.objects.bulk_create(objects_to_create) else: model.objects.bulk_create(objects_to_create) # Get back the just-saved entries just_saved_queryset = model.objects.filter( **{ '{}__in'.format(unique_identifier): import_new_entry_pks.keys() } ).values_list(unique_identifier, 'pk') # note: convert uuids from type UUID to strings just_saved = {str(key): value for key, value in just_saved_queryset} # Now I have the PKs, print the info # Moreover, add newly created Nodes to foreign_ids_reverse_mappings for unique_id, new_pk in just_saved.items(): import_entry_pk = import_new_entry_pks[unique_id] foreign_ids_reverse_mappings[model_name][unique_id] = new_pk if model_name not in ret_dict: ret_dict[model_name] = {'new': [], 'existing': []} ret_dict[model_name]['new'].append((import_entry_pk, new_pk)) if not silent: print('NEW %s: %s (%s->%s)' % (model_name, unique_id, import_entry_pk, new_pk)) if not silent: print('STORING NODE LINKS...') import_links = data['links_uuid'] links_to_store = [] # Needed, since QueryBuilder does not yet work for recently saved Nodes existing_links_raw = models.DbLink.objects.all().values_list('input', 'output', 'label', 'type') existing_links = {(l[0], l[1], l[2], l[3]) for l in existing_links_raw} existing_outgoing_unique = {(l[0], l[3]) for l in existing_links_raw} existing_outgoing_unique_pair = {(l[0], l[2], l[3]) for l in existing_links_raw} existing_incoming_unique = {(l[1], l[3]) for l in existing_links_raw} existing_incoming_unique_pair = {(l[1], l[2], l[3]) for l in existing_links_raw} calculation_node_types = 'process.calculation.' workflow_node_types = 'process.workflow.' data_node_types = 'data.' link_mapping = { LinkType.CALL_CALC: (workflow_node_types, calculation_node_types, 'unique_triple', 'unique'), LinkType.CALL_WORK: (workflow_node_types, workflow_node_types, 'unique_triple', 'unique'), LinkType.CREATE: (calculation_node_types, data_node_types, 'unique_pair', 'unique'), LinkType.INPUT_CALC: (data_node_types, calculation_node_types, 'unique_triple', 'unique_pair'), LinkType.INPUT_WORK: (data_node_types, workflow_node_types, 'unique_triple', 'unique_pair'), LinkType.RETURN: (workflow_node_types, data_node_types, 'unique_pair', 'unique_triple'), } for link in import_links: # Check for dangling Links within the, supposed, self-consistent archive try: in_id = foreign_ids_reverse_mappings[NODE_ENTITY_NAME][link['input']] out_id = foreign_ids_reverse_mappings[NODE_ENTITY_NAME][link['output']] except KeyError: if ignore_unknown_nodes: continue else: raise exceptions.ImportValidationError( 'Trying to create a link with one or both unknown nodes, stopping (in_uuid={}, ' 'out_uuid={}, label={}, type={})'.format( link['input'], link['output'], link['label'], link['type'] ) ) # Check if link already exists, skip if it does # This is equivalent to an existing triple link (i.e. unique_triple from below) if (in_id, out_id, link['label'], link['type']) in existing_links: continue # Since backend specific Links (DbLink) are not validated upon creation, we will now validate them. try: validate_link_label(link['label']) except ValueError as why: raise exceptions.ImportValidationError('Error during Link label validation: {}'.format(why)) source = models.DbNode.objects.get(id=in_id) target = models.DbNode.objects.get(id=out_id) if source.uuid == target.uuid: raise exceptions.ImportValidationError('Cannot add a link to oneself') link_type = LinkType(link['type']) type_source, type_target, outdegree, indegree = link_mapping[link_type] # Check if source Node is a valid type if not source.node_type.startswith(type_source): raise exceptions.ImportValidationError( 'Cannot add a {} link from {} to {}'.format(link_type, source.node_type, target.node_type) ) # Check if target Node is a valid type if not target.node_type.startswith(type_target): raise exceptions.ImportValidationError( 'Cannot add a {} link from {} to {}'.format(link_type, source.node_type, target.node_type) ) # If the outdegree is `unique` there cannot already be any other outgoing link of that type, # i.e., the source Node may not have a LinkType of current LinkType, going out, existing already. if outdegree == 'unique' and (in_id, link['type']) in existing_outgoing_unique: raise exceptions.ImportValidationError( 'Node<{}> already has an outgoing {} link'.format(source.uuid, link_type) ) # If the outdegree is `unique_pair`, # then the link labels for outgoing links of this type should be unique, # i.e., the source Node may not have a LinkType of current LinkType, going out, # that also has the current Link label, existing already. elif outdegree == 'unique_pair' and \ (in_id, link['label'], link['type']) in existing_outgoing_unique_pair: raise exceptions.ImportValidationError( 'Node<{}> already has an outgoing {} link with label "{}"'.format( source.uuid, link_type, link['label'] ) ) # If the indegree is `unique` there cannot already be any other incoming links of that type, # i.e., the target Node may not have a LinkType of current LinkType, coming in, existing already. if indegree == 'unique' and (out_id, link['type']) in existing_incoming_unique: raise exceptions.ImportValidationError( 'Node<{}> already has an incoming {} link'.format(target.uuid, link_type) ) # If the indegree is `unique_pair`, # then the link labels for incoming links of this type should be unique, # i.e., the target Node may not have a LinkType of current LinkType, coming in # that also has the current Link label, existing already. elif indegree == 'unique_pair' and \ (out_id, link['label'], link['type']) in existing_incoming_unique_pair: raise exceptions.ImportValidationError( 'Node<{}> already has an incoming {} link with label "{}"'.format( target.uuid, link_type, link['label'] ) ) # New link links_to_store.append( models.DbLink(input_id=in_id, output_id=out_id, label=link['label'], type=link['type']) ) if 'Link' not in ret_dict: ret_dict['Link'] = {'new': []} ret_dict['Link']['new'].append((in_id, out_id)) # Add new Link to sets of existing Links 'input PK', 'output PK', 'label', 'type' existing_links.add((in_id, out_id, link['label'], link['type'])) existing_outgoing_unique.add((in_id, link['type'])) existing_outgoing_unique_pair.add((in_id, link['label'], link['type'])) existing_incoming_unique.add((out_id, link['type'])) existing_incoming_unique_pair.add((out_id, link['label'], link['type'])) # Store new links if links_to_store: if not silent: print(' ({} new links...)'.format(len(links_to_store))) models.DbLink.objects.bulk_create(links_to_store) else: if not silent: print(' (0 new links...)') if not silent: print('STORING GROUP ELEMENTS...') import_groups = data['groups_uuid'] for groupuuid, groupnodes in import_groups.items(): # TODO: cache these to avoid too many queries group_ = models.DbGroup.objects.get(uuid=groupuuid) nodes_to_store = [foreign_ids_reverse_mappings[NODE_ENTITY_NAME][node_uuid] for node_uuid in groupnodes] if nodes_to_store: group_.dbnodes.add(*nodes_to_store) ###################################################### # Put everything in a specific group ###################################################### existing = existing_entries.get(NODE_ENTITY_NAME, {}) existing_pk = [foreign_ids_reverse_mappings[NODE_ENTITY_NAME][v['uuid']] for v in six.itervalues(existing)] new = new_entries.get(NODE_ENTITY_NAME, {}) new_pk = [foreign_ids_reverse_mappings[NODE_ENTITY_NAME][v['uuid']] for v in six.itervalues(new)] pks_for_group = existing_pk + new_pk # So that we do not create empty groups if pks_for_group: # If user specified a group, import all things into it if not group: # Get an unique name for the import group, based on the current (local) time basename = timezone.localtime(timezone.now()).strftime('%Y%m%d-%H%M%S') counter = 0 group_label = basename while Group.objects.find(filters={'label': group_label}): counter += 1 group_label = '{}_{}'.format(basename, counter) if counter == 100: raise exceptions.ImportUniquenessError( "Overflow of import groups (more than 100 import groups exists with basename '{}')" ''.format(basename) ) group = Group(label=group_label, type_string=IMPORTGROUP_TYPE).store() # Add all the nodes to the new group # TODO: decide if we want to return the group label nodes = [entry[0] for entry in QueryBuilder().append(Node, filters={'id': {'in': pks_for_group}}).all()] group.add_nodes(nodes) if not silent: print("IMPORTED NODES ARE GROUPED IN THE IMPORT GROUP LABELED '{}'".format(group.label)) else: if not silent: print('NO NODES TO IMPORT, SO NO GROUP CREATED, IF IT DID NOT ALREADY EXIST') if not silent: print('*** WARNING: MISSING EXISTING UUID CHECKS!!') print('*** WARNING: TODO: UPDATE IMPORT_DATA WITH DEFAULT VALUES! (e.g. calc status, user pwd, ...)') print('DONE.') return ret_dict
def extract_file(filename, extract, destination, silent=False): try: if tarfile.is_tarfile(filename): if silent == False: extract_dlg = xbmcgui.DialogProgress() extract_dlg.create('OpenELEC ', _(32186).encode('utf-8'), ' ', ' ') extract_dlg.update(0) compressed = tarfile.open(filename) if silent == False: xbmc.executebuiltin('ActivateWindow(busydialog)') names = compressed.getnames() if silent == False: xbmc.executebuiltin('Dialog.Close(busydialog)') for name in names: for search in extract: if search in name: fileinfo = compressed.getmember(name) response = compressed.extractfile(fileinfo) local_file = open(destination + name.rsplit('/', 1)[1], 'wb') total_size = fileinfo.size minutes = 0 seconds = 0 rest = 1 speed = 1 start = time.time() size = 1 part_size = 1 last_percent = 0 while 1: part = response.read(32768) part_size += len(part) if silent == False: if extract_dlg.iscanceled(): local_file.close() response.close() return None if not part or xbmc.abortRequested: break if time.time() > start + 2: speed = int((part_size - size) / (time.time() - start) / 1024) start = time.time() size = part_size rest = total_size - part_size minutes = rest / 1024 / speed / 60 seconds = rest / 1024 / speed - minutes * 60 percent = int(part_size * 100.0 / total_size) if silent == False: extract_dlg.update( percent, _(32184) + ': %s' % name.rsplit('/', 1)[1], _(32185) + ': %d KB/s' % speed, _(32183) + ': %d m %d s' % (minutes, seconds)) if extract_dlg.iscanceled(): local_file.close() response.close() return None else: if percent > last_percent + 5: dbg_log( 'oe::extract_file(' + destination + name.rsplit('/', 1)[1] + ')', '%d percent with %d KB/s' % (percent, speed)) last_percent = percent local_file.write(part) local_file.close() response.close() return 1 except Exception, e: dbg_log('oe::extract_file', 'ERROR: (' + repr(e) + ')')
def tar_files(self, aTarFileBytes, formatt): "When called will unpack and edit a Tar File and return a tar file" print "[*] TarFile size:", len(aTarFileBytes) / 1024, 'KB' if len(aTarFileBytes) > int(self.userConfig['TAR']['maxSize']): print "[!] TarFile over allowed size" mitmf_logger.info("TarFIle maxSize met {}".format( len(aTarFileBytes))) self.patched.put(aTarFileBytes) return with tempfile.NamedTemporaryFile() as tarFileStorage: tarFileStorage.write(aTarFileBytes) tarFileStorage.flush() if not tarfile.is_tarfile(tarFileStorage.name): print '[!] Not a tar file' self.patched.put(aTarFileBytes) return compressionMode = ':' if formatt == 'gz': compressionMode = ':gz' if formatt == 'bz': compressionMode = ':bz2' tarFile = None try: tarFileStorage.seek(0) tarFile = tarfile.open(fileobj=tarFileStorage, mode='r' + compressionMode) except tarfile.ReadError: pass if tarFile is None: print '[!] Not a tar file' self.patched.put(aTarFileBytes) return print '[*] Tar file contents and info:' print '[*] Compression:', formatt members = tarFile.getmembers() for info in members: print "\t", info.name, info.mtime, info.size newTarFileStorage = tempfile.NamedTemporaryFile() newTarFile = tarfile.open(mode='w' + compressionMode, fileobj=newTarFileStorage) patchCount = 0 wasPatched = False for info in members: print "[*] >>> Next file in tarfile:", info.name if not info.isfile(): print info.name, 'is not a file' newTarFile.addfile(info, tarFile.extractfile(info)) continue if info.size >= long(self.FileSizeMax): print info.name, 'is too big' newTarFile.addfile(info, tarFile.extractfile(info)) continue # Check against keywords keywordCheck = False if type(self.tarblacklist) is str: if self.tarblacklist.lower() in info.name.lower(): keywordCheck = True else: for keyword in self.tarblacklist: if keyword.lower() in info.name.lower(): keywordCheck = True continue if keywordCheck is True: print "[!] Tar blacklist enforced!" mitmf_logger.info('Tar blacklist enforced on {}'.format( info.name)) continue # Try to patch extractedFile = tarFile.extractfile(info) if patchCount >= int(self.userConfig['TAR']['patchCount']): newTarFile.addfile(info, extractedFile) else: # create the file on disk temporarily for fileGrinder to run on it with tempfile.NamedTemporaryFile() as tmp: shutil.copyfileobj(extractedFile, tmp) tmp.flush() patchResult = self.binaryGrinder(tmp.name) if patchResult: patchCount += 1 file2 = "backdoored/" + os.path.basename(tmp.name) print "[*] Patching complete, adding to tar file." info.size = os.stat(file2).st_size with open(file2, 'rb') as f: newTarFile.addfile(info, f) mitmf_logger.info( "{} in tar patched, adding to tarfile".format( info.name)) os.remove(file2) wasPatched = True else: print "[!] Patching failed" with open(tmp.name, 'rb') as f: newTarFile.addfile(info, f) mitmf_logger.info( "{} patching failed. Keeping original file in tar." .format(info.name)) if patchCount == int(self.userConfig['TAR']['patchCount']): mitmf_logger.info("Met Tar config patchCount limit.") # finalize the writing of the tar file first newTarFile.close() # then read the new tar file into memory newTarFileStorage.seek(0) ret = newTarFileStorage.read() newTarFileStorage.close() # it's automatically deleted if wasPatched is False: # If nothing was changed return the original print "[*] No files were patched forwarding original file" self.patched.put(aTarFileBytes) return else: self.patched.put(ret) return
def main(): # Forced options from multiple sources are gathered into a shared list # so that the override order remains the same as on the command line. force_options_yaml = [] def add_force_option_yaml(option, opt, value, parser): # XXX: check that YAML parses force_options_yaml.append(value) def add_force_option_file(option, opt, value, parser): # XXX: check that YAML parses with open(value, 'rb') as f: force_options_yaml.append(f.read()) def add_force_option_define(option, opt, value, parser): tmp = value.split('=') if len(tmp) == 1: doc = {tmp[0]: True} elif len(tmp) == 2: doc = {tmp[0]: tmp[1]} else: raise Exception('invalid option value: %r' % value) force_options_yaml.append(yaml.safe_dump(doc)) def add_force_option_undefine(option, opt, value, parser): tmp = value.split('=') if len(tmp) == 1: doc = {tmp[0]: False} else: raise Exception('invalid option value: %r' % value) force_options_yaml.append(yaml.safe_dump(doc)) fixup_header_lines = [] def add_fixup_header_line(option, opt, value, parser): fixup_header_lines.append(value) def add_fixup_header_file(option, opt, value, parser): with open(value, 'rb') as f: for line in f: if line[-1] == '\n': line = line[:-1] fixup_header_lines.append(line) commands = [ 'autodetect-header', 'barebones-header', 'feature-documentation', 'config-documentation' ] parser = optparse.OptionParser( usage='Usage: %prog [options] COMMAND', description= 'Generate a duk_config.h or config option documentation based on config metadata.', epilog='COMMAND can be one of: ' + ', '.join(commands) + '.') parser.add_option('--metadata', dest='metadata', default=None, help='metadata directory or metadata tar.gz file') parser.add_option( '--output', dest='output', default=None, help='output filename for C header or RST documentation file') parser.add_option('--platform', dest='platform', default=None, help='platform (for "barebones-header" command)') parser.add_option('--compiler', dest='compiler', default=None, help='compiler (for "barebones-header" command)') parser.add_option('--architecture', dest='architecture', default=None, help='architecture (for "barebones-header" command)') parser.add_option( '--dll', dest='dll', action='store_true', default=False, help= 'dll build of Duktape, affects symbol visibility macros especially on Windows' ) # FIXME: unimplemented parser.add_option( '--emit-legacy-feature-check', dest='emit_legacy_feature_check', action='store_true', default=False, help= 'emit preprocessor checks to reject legacy feature options (DUK_OPT_xxx)' ) parser.add_option( '--emit-config-sanity-check', dest='emit_config_sanity_check', action='store_true', default=False, help= 'emit preprocessor checks for config option consistency (DUK_OPT_xxx)') parser.add_option( '--omit-removed-config-options', dest='omit_removed_config_options', action='store_true', default=False, help='omit removed config options from generated headers') parser.add_option( '--omit-deprecated-config-options', dest='omit_deprecated_config_options', action='store_true', default=False, help='omit deprecated config options from generated headers') parser.add_option('--omit-unused-config-options', dest='omit_unused_config_options', action='store_true', default=False, help='omit unused config options from generated headers') parser.add_option( '--define', type='string', dest='force_options_yaml', action='callback', callback=add_force_option_define, default=force_options_yaml, help= 'force #define option using a C compiler like syntax, e.g. "--define DUK_USE_DEEP_C_STACK" or "--define DUK_USE_TRACEBACK_DEPTH=10"' ) parser.add_option( '-D', type='string', dest='force_options_yaml', action='callback', callback=add_force_option_define, default=force_options_yaml, help= 'synonym for --define, e.g. "-DDUK_USE_DEEP_C_STACK" or "-DDUK_USE_TRACEBACK_DEPTH=10"' ) parser.add_option( '--undefine', type='string', dest='force_options_yaml', action='callback', callback=add_force_option_undefine, default=force_options_yaml, help= 'force #undef option using a C compiler like syntax, e.g. "--undefine DUK_USE_DEEP_C_STACK"' ) parser.add_option( '-U', type='string', dest='force_options_yaml', action='callback', callback=add_force_option_undefine, default=force_options_yaml, help='synonym for --undefine, e.g. "-UDUK_USE_DEEP_C_STACK"') parser.add_option( '--option-yaml', type='string', dest='force_options_yaml', action='callback', callback=add_force_option_yaml, default=force_options_yaml, help= 'force option(s) using inline YAML (e.g. --option-yaml "DUK_USE_DEEP_C_STACK: true")' ) parser.add_option('--option-file', type='string', dest='force_options_yaml', action='callback', callback=add_force_option_file, default=force_options_yaml, help='YAML file(s) providing config option overrides') parser.add_option( '--fixup-file', type='string', dest='fixup_header_lines', action='callback', callback=add_fixup_header_file, default=fixup_header_lines, help= 'C header snippet file(s) to be appended to generated header, useful for manual option fixups' ) parser.add_option( '--fixup-line', type='string', dest='fixup_header_lines', action='callback', callback=add_fixup_header_line, default=fixup_header_lines, help= 'C header fixup line to be appended to generated header (e.g. --fixup-line "#define DUK_USE_FASTINT")' ) parser.add_option( '--sanity-warning', dest='sanity_strict', action='store_false', default=True, help='emit a warning instead of #error for option sanity check issues') parser.add_option('--use-cpp-warning', dest='use_cpp_warning', action='store_true', default=False, help='emit a (non-portable) #warning when appropriate') (opts, args) = parser.parse_args() meta_dir = opts.metadata if opts.metadata is None: if os.path.isfile(os.path.join('.', 'genconfig_metadata.tar.gz')): opts.metadata = 'genconfig_metadata.tar.gz' elif os.path.isdir(os.path.join('.', 'config-options')): opts.metadata = '.' if opts.metadata is not None and os.path.isdir(opts.metadata): meta_dir = opts.metadata print 'Using metadata directory: %r' % meta_dir elif opts.metadata is not None and os.path.isfile( opts.metadata) and tarfile.is_tarfile(opts.metadata): meta_dir = get_auto_delete_tempdir() tar = tarfile.open(name=opts.metadata, mode='r:*') tar.extractall(path=meta_dir) print 'Using metadata tar file %r, unpacked to directory: %r' % ( opts.metadata, meta_dir) else: raise Exception('metadata source must be a directory or a tar.gz file') scan_snippets(os.path.join(meta_dir, 'header-snippets')) scan_use_defs(os.path.join(meta_dir, 'config-options')) scan_opt_defs(os.path.join(meta_dir, 'feature-options')) scan_use_tags() scan_tags_meta(os.path.join(meta_dir, 'tags.yaml')) print('Scanned %d DUK_OPT_xxx, %d DUK_USE_XXX, %d helper snippets' % \ (len(opt_defs.keys()), len(use_defs.keys()), len(helper_snippets))) #print('Tags: %r' % use_tags_list) if len(args) == 0: raise Exception('missing command') cmd = args[0] if cmd == 'autodetect-header': # Generate a duk_config.h similar to Duktape 1.2 feature detection. result = generate_autodetect_duk_config_header(opts, meta_dir) with open(opts.output, 'wb') as f: f.write(result) elif cmd == 'barebones-header': # Generate a duk_config.h with default options for a specific platform, # compiler, and architecture. result = generate_barebones_duk_config_header(opts, meta_dir) with open(opts.output, 'wb') as f: f.write(result) elif cmd == 'feature-documentation': result = generate_feature_option_documentation(opts) with open(opts.output, 'wb') as f: f.write(result) elif cmd == 'config-documentation': result = generate_config_option_documentation(opts) with open(opts.output, 'wb') as f: f.write(result) else: raise Exception('invalid command: %r' % cmd)
def _downloader_worker_thread(thread_num, q, force, base_url, gsutil, out_q, ret_codes, verbose, extract, delete=True): while True: input_sha1_sum, output_filename = q.get() if input_sha1_sum is None: return extract_dir = None if extract: if not output_filename.endswith('.tar.gz'): out_q.put('%d> Error: %s is not a tar.gz archive.' % (thread_num, output_filename)) ret_codes.put( (1, '%s is not a tar.gz archive.' % (output_filename))) continue extract_dir = output_filename[:-len('.tar.gz')] if os.path.exists(output_filename) and not force: if not extract or os.path.exists(extract_dir): if get_sha1(output_filename) == input_sha1_sum: continue # Check if file exists. file_url = '%s/%s' % (base_url, input_sha1_sum) (code, _, err) = gsutil.check_call('ls', file_url) if code != 0: if code == 404: out_q.put('%d> File %s for %s does not exist, skipping.' % (thread_num, file_url, output_filename)) ret_codes.put((1, 'File %s for %s does not exist.' % (file_url, output_filename))) elif code == 401: out_q.put( """%d> Failed to fetch file %s for %s due to unauthorized access, skipping. Try running `gsutil.py config` and pass 0 if you don't know your project id.""" % (thread_num, file_url, output_filename)) ret_codes.put(( 1, 'Failed to fetch file %s for %s due to unauthorized access.' % (file_url, output_filename))) else: # Other error, probably auth related (bad ~/.boto, etc). out_q.put( '%d> Failed to fetch file %s for %s, skipping. [Err: %s]' % (thread_num, file_url, output_filename, err)) ret_codes.put((1, 'Failed to fetch file %s for %s. [Err: %s]' % (file_url, output_filename, err))) continue # Fetch the file. if verbose: out_q.put('%d> Downloading %s...' % (thread_num, output_filename)) try: if delete: os.remove( output_filename) # Delete the file if it exists already. except OSError: if os.path.exists(output_filename): out_q.put('%d> Warning: deleting %s failed.' % (thread_num, output_filename)) code, _, err = gsutil.check_call('cp', file_url, output_filename) if code != 0: out_q.put('%d> %s' % (thread_num, err)) ret_codes.put((code, err)) continue remote_sha1 = get_sha1(output_filename) if remote_sha1 != input_sha1_sum: msg = ( '%d> ERROR remote sha1 (%s) does not match expected sha1 (%s).' % (thread_num, remote_sha1, input_sha1_sum)) out_q.put(msg) ret_codes.put((20, msg)) continue if extract: if not tarfile.is_tarfile(output_filename): out_q.put('%d> Error: %s is not a tar.gz archive.' % (thread_num, output_filename)) ret_codes.put( (1, '%s is not a tar.gz archive.' % (output_filename))) continue with tarfile.open(output_filename, 'r:gz') as tar: dirname = os.path.dirname(os.path.abspath(output_filename)) # If there are long paths inside the tarball we can get extraction # errors on windows due to the 260 path length limit (this includes # pwd). Use the extended path syntax. if sys.platform == 'win32': dirname = '\\\\?\\%s' % dirname if not _validate_tar_file(tar, os.path.basename(extract_dir)): out_q.put('%d> Error: %s contains files outside %s.' % (thread_num, output_filename, extract_dir)) ret_codes.put( (1, '%s contains invalid entries.' % (output_filename))) continue if os.path.exists(extract_dir): try: shutil.rmtree(extract_dir) out_q.put('%d> Removed %s...' % (thread_num, extract_dir)) except OSError: out_q.put('%d> Warning: Can\'t delete: %s' % (thread_num, extract_dir)) ret_codes.put((1, 'Can\'t delete %s.' % (extract_dir))) continue out_q.put('%d> Extracting %d entries from %s to %s' % (thread_num, len( tar.getmembers()), output_filename, extract_dir)) tar.extractall(path=dirname) # Set executable bit. if sys.platform == 'cygwin': # Under cygwin, mark all files as executable. The executable flag in # Google Storage will not be set when uploading from Windows, so if # this script is running under cygwin and we're downloading an # executable, it will be unrunnable from inside cygwin without this. st = os.stat(output_filename) os.chmod(output_filename, st.st_mode | stat.S_IEXEC) elif sys.platform != 'win32': # On non-Windows platforms, key off of the custom header # "x-goog-meta-executable". code, out, err = gsutil.check_call('stat', file_url) if code != 0: out_q.put('%d> %s' % (thread_num, err)) ret_codes.put((code, err)) elif re.search(r'executable:\s*1', out): st = os.stat(output_filename) os.chmod(output_filename, st.st_mode | stat.S_IEXEC)