def _unpack(self, filepath):
     """Determines the vim script's extension and unpacks it. Sets the
     files variable. Removes the archive file and temp dir.
     """
     import os.path
     root, ext = os.path.splitext(filepath)
     if ext == u'.zip':
         from zipfile import ZipFile
         archive = ZipFile(filepath, 'r')
         self.files = [member.filename for member in archive.infolist() if
                 not member.filename[-1] == '/']
         archive.extractall('/home/chris/.vim')
     elif ext == u'.tar' or ext == u'.tgz' or ext == u'.bz2':
         import tarfile
         archive = tarfile.open(filepath)
         self.files = [member.name for member in archive.getmembers() if
                 member.isfile()]
         archive.extractall('/home/chris/.vim')
     elif (ext == u'.gz' or ext == u'.bz2') and (os.path.splitext(root)[1] ==
             u'.tar'):
         import tarfile
         archive = tarfile.open(filepath)
         self.files = [member.name for member in archive.getmembers() if
                 member.isfile()]
         archive.extractall('/home/chris/.vim')
     elif ext == u'.vba':
         vimExecute(':so %\n:q\n')
         self.files=[filepath]
     # Cleanup
     print "Deleting {0} and {1}".format(filepath, os.path.dirname(filepath))
     os.unlink(filepath)
     os.rmdir(os.path.dirname(filepath))
Beispiel #2
0
def extract_archive(archive_path, extract_to, exclude=None):
    """
    Extract archive (.tar, .zip)

    :param archive_path: Path to archive
    :type archive_path: String|pathlib.Path

    :param extract_to: Path to extraction
    :type extract_to: String|pathlib.Path

    :param exclude: Patterns for files and directories that should not be extracted
                    (just path sub-strings with no wildcards or regexp)
    :type exclude: List
    """

    archive_path = pathlib.Path(archive_path)
    extract_to = pathlib.Path(extract_to)

    if archive_path.suffix == '.tar':
        package = tarfile.open(str(archive_path), 'r')
    elif archive_path.suffix == '.gz':
        package = tarfile.open(str(archive_path), 'r:gz')
    elif archive_path.suffix == '.zip':
        package = ZipFile(str(archive_path))
    else:
        raise UnsupportedArchiveError(
            f"Unsupported archive extension {archive_path.suffix}")

    if isinstance(package, ZipFile):
        package_data = package.infolist()
        is_zip = True
    else:
        package_data = package.getmembers()
        is_zip = False

    data_to_extract = None
    if exclude and isinstance(exclude, list):
        data_to_extract = []
        for member in package_data:
            is_excluded = False
            for pattern in exclude:
                member_path = member.filename if is_zip else member.name
                if pattern in member_path:
                    is_excluded = True
                    break
            if not is_excluded:
                data_to_extract.append(member)

    package.extractall(extract_to, members=data_to_extract)
    package.close()
Beispiel #3
0
	def download(self, input_data_url, output_data_path):

		file_name = input_data_url.split('/')[-1]
		output_file_path = os.path.join(output_data_path,file_name)

		if not os.path.exists(output_file_path):
		
			# Creating output directory if it does not exist
			if not os.path.exists(output_data_path):
				os.makedirs(output_data_path)

			# Downloading the data file
			file_path, _ = urlretrieve(url=input_data_url,
				filename=output_file_path,
				reporthook=self.show_download_progress)
		
			print('\nDownload completed')
		else:
			print('File already downloaded')
		
		extracted_archive_path = None
		
		if output_file_path.endswith('.zip'):
		
			# Extracting zip archive
			archive = ZipFile(file=output_file_path,mode='r')
			archive.extractall(output_data_path)
			extracted_archive_path = os.path.join(output_data_path,archive.infolist()[0].filename)
		
			print('Archive unpacked')
		
		elif output_file_path.endswith(('.tar.gz','.tgz')):
		
			# Extracting tar-ball
			archive = tarfile.open(name=output_file_path,mode='r:gz')
			archive.extractall(output_data_path)
			extracted_archive_path = os.path.join(output_data_path,archive.getmembers()[0].name)

			print('Archive unpacked')
		else:
			print('Archive file format not supported')
		
		return extracted_archive_path
Beispiel #4
0
    def clean_dataset_file(self):
        """Perform the cleaning of the dataset_file (the archive file specified by the user).
        This method returns information about the files in the archive in form a
        2-tuple (filesize, stream).

        The returned list will be accessible from the cleaned_data directory.
        """
        # First try to unzip the archive
        try:
            archive = ZipFile(self.cleaned_data["dataset_file"],
                              allowZip64=True)

            def get_filestream(filename):
                f = archive.open(filename)
                size = archive.getinfo(filename).file_size
                streamable = six.BytesIO(f.read())
                streamable.name = f.name.replace('/', '_')
                #                streamable.name = f.name.split('/')[-1]
                f.close()
                return size, streamable

            def should_include(filename):
                # Bail on directories
                if filename.endswith("/"):
                    return False

                # And on hidden files
                if filename.split("/")[-1].startswith("."):
                    return False

                return True

            files = [
                get_filestream(file_name) for file_name in archive.namelist()
                if should_include(file_name)
            ]

        except zipfile.BadZipfile:
            # Bad zip? Try tar why not
            try:
                self.cleaned_data["dataset_file"].seek(
                    0)  # Reset the file so we can read it again
                archive = tarfile.open(
                    name=None,
                    mode='r',
                    fileobj=self.cleaned_data["dataset_file"])

                def get_filestream(archive_member):
                    xfile = archive.extractfile(archive_member)
                    if xfile is not None:
                        xfile.name = xfile.name[2:].replace('/', '_')
                        return xfile.size, xfile
                    else:
                        return 0, None

                def should_include(name):
                    name = name[2:]
                    if name.endswith("/"):
                        return False

                    # And on hidden files
                    if name.split("/")[-1].startswith("."):
                        return False
                    return True

                files = [
                    get_filestream(member) for member in archive.getmembers()
                    if should_include(member.name)
                ]
                files = filter(lambda x: x is not None, files)

            except tarfile.TarError:
                raise forms.ValidationError(_(
                    'Not a valid archive file. We currently accept Zip and Tar files.'
                ),
                                            code='invalid')
        return files
Beispiel #5
0
def handle_file(file_path, module_filter, only_detect, is_temp_file=False):
    # todo modular archive handling
    # todo PE overlay extraction
    # todo PE resources extraction
    # todo Installer extraction
    if is_zipfile(file_path):
        # extract each file and handle it
        # todo consider adding archive password support
        try:
            z = ZipFile(file_path)
            for n in z.namelist():
                data = z.read(n)
                new_path = write_file_to_temp_file(data)
                for p, r in handle_file(new_path, module_filter, only_detect, is_temp_file=True):
                    result_path = ""
                    if is_temp_file:
                        result_path = n
                    else:
                        result_path = file_path + "," + n
                    if p is not None:
                        result_path += "," + p
                    yield result_path, r
                remove(new_path)
        except KeyboardInterrupt:
            raise
        except:
            pass
    elif tarfile.is_tarfile(file_path):
        try:
            with tarfile.open(file_path, 'r') as z:
                for member in z.getmembers():
                    try:
                        data = z.extractfile(member).read()
                        n = member.name
                        new_path = write_file_to_temp_file(data)
                        for p, r in handle_file(new_path, module_filter, only_detect, is_temp_file=True):
                            result_path = ""
                            if is_temp_file:
                                result_path = n
                            else:
                                result_path = file_path + "," + n
                            if p is not None:
                                result_path += "," + p
                            yield result_path, r
                        remove(new_path)
                    except KeyboardInterrupt:
                        raise
                    except:
                        pass
        except KeyboardInterrupt:
            raise
        except:
            pass
    elif is_rarfile(file_path):
        try:
            z = RarFile(file_path)
            for n in z.namelist():
                data = z.read(n)
                new_path = write_file_to_temp_file(data)
                for p, r in handle_file(new_path, module_filter, only_detect, is_temp_file=True):
                    result_path = ""
                    if is_temp_file:
                        result_path = n
                    else:
                        result_path = file_path + "," + n
                    if p is not None:
                        result_path += "," + p
                    yield result_path, r
                remove(new_path)
        except KeyboardInterrupt:
            raise
        except:
            pass
    else:
        # assume we are dealing with a normal file
        # todo Convert file handling to use file paths
        if getsize(file_path) < 1024 * 1024 * 1024:
            with open(file_path, mode='rb') as file_handle:
                file_content = file_handle.read()
                r = scan_file_data(file_content, module_filter, only_detect)
                if r is not None:
                    if is_temp_file:
                        yield None, r
                    else:
                        yield file_path, r
Beispiel #6
0
    def clean_dataset_file(self):
        """Perform the cleaning of the dataset_file (the archive file specified by the user).
        This method returns information about the files in the archive in form a
        2-tuple (filesize, stream).

        The returned list will be accessible from the cleaned_data directory.
        """
        # First try to unzip the archive
        try:
            archive = ZipFile(self.cleaned_data["dataset_file"], allowZip64=True)

            def get_filestream(filename):
                f = archive.open(filename)
                size = archive.getinfo(filename).file_size
                streamable = six.BytesIO(f.read())
                streamable.name = f.name.replace('/', '_')
#                streamable.name = f.name.split('/')[-1]
                f.close()
                return size, streamable

            def should_include(filename):
                # Bail on directories
                if filename.endswith("/"):
                    return False

                # And on hidden files
                if filename.split("/")[-1].startswith("."):
                    return False

                return True

            files = [get_filestream(file_name) for file_name in archive.namelist() if should_include(file_name)]

        except zipfile.BadZipfile:
            # Bad zip? Try tar why not
            try:
                self.cleaned_data["dataset_file"].seek(0)  # Reset the file so we can read it again
                archive = tarfile.open(name=None, mode='r', fileobj=self.cleaned_data["dataset_file"])

                def get_filestream(archive_member):
                    xfile = archive.extractfile(archive_member)
                    if xfile is not None:
                        xfile.name = xfile.name[2:].replace('/', '_')
                        return xfile.size, xfile
                    else:
                        return 0, None

                def should_include(name):
                    name = name[2:]
                    if name.endswith("/"):
                        return False

                    # And on hidden files
                    if name.split("/")[-1].startswith("."):
                        return False
                    return True

                files = [get_filestream(member) for member in archive.getmembers() if should_include(member.name)]
                files = filter(lambda x: x is not None, files)

            except tarfile.TarError:
                raise forms.ValidationError(_('Not a valid archive file. We currently accept Zip and Tar files.'),
                                            code='invalid')
        return files
Beispiel #7
0
    def loadZip(self, filePaths):
        """
        Compatible with zip, tar and gz extensions
        Read only Red, Near IR and Thermal IR bands
        Get satellite type by looking at the name of the metadata file.
        """

        filepath = filePaths["zip"]
        recognised = False
        bands = {"Error": None}
        for ext in [".tar.gz", ".tar", ".zip", ".gz"]:
            if filepath.lower().endswith(ext):
                recognised = True
        if not (recognised):
            bands["Error"] = "Unknown compressed file format"
            return bands
        self.folder = filepath[:filepath.rfind("/")]

        if filepath.lower().endswith(".zip"):
            compressed = ZipFile(filepath, "r")
            extract = compressed.extract
            listoffiles = compressed.namelist()
        elif filepath.lower().endswith(".gz"):
            compressed = tarfile.open(filepath, "r:gz")
            extract = compressed.extract
            listoffiles = [member.name for member in compressed.getmembers()]
        else:
            compressed = tarfile.open(filepath, "r")
            extract = compressed.extract
            listoffiles = compressed.getmembers()

        for filename in listoffiles:
            if filename.upper().endswith("MTL.TXT"):
                if filename[:4] == "LC08":
                    bands["sat_type"] = "Landsat8"
                    sat_type = "Landsat8"
                if filename[:4] == "LT05":
                    bands["sat_type"] = "Landsat5"
                    sat_type = "Landsat5"
        if "sat_type" not in bands:
            bands[
                "Error"] = "Unknown satellite - Please verify that files have not been renamed"
            compressed.close()
            return bands

        sat_bands = {
            "Landsat5": {
                "Red": "B3",
                "Near-IR": "B4",
                "Thermal-IR": "B6"
            },
            "Landsat8": {
                "Red": "B4",
                "Near-IR": "B5",
                "Thermal-IR": "B10"
            },
        }

        shapefile = None
        if "Shape" in filePaths:
            shapefile = filePaths["Shape"]

        filePaths = dict()
        for band in ("Red", "Near-IR", "Thermal-IR"):
            bands[band] = np.array([])
            for filename in listoffiles:
                if filename.upper().endswith(sat_bands[sat_type][band] +
                                             ".TIF"):
                    extract(filename)
                    filePaths[band] = filename
        compressed.close()
        for band in ("Red", "Near-IR", "Thermal-IR"):
            bands[band] = self.readBand(filePaths[band])

        if shapefile:
            bands["Shape"] = self.readShapeFile(shapefile)
            if type(bands["Shape"]) == str:
                bands["Error"] = bands["Shape"]
                return bands
        return bands
Beispiel #8
0
 def filter_files(self):
     fileset_name = request.params["fileset_name"]
     unique_id = request.params['unique_id']
     course = h.get_object_or_404(Course, id=request.params["course_id"])
     check_course_access(course.id)
     #is_solution = bool(int(request.params["is_solution"]))
     offering = h.get_object_or_404(Offering, id=request.params["offering_id"])
     semester = offering.semester
     student_fileset = 0
     solution_fileset = 1
     base_fileset = 2
     fileset_type = student_fileset
     if isinstance(semester, SolutionSemester):
         fileset_type = solution_fileset
     elif isinstance(semester, BaseSemester):
         fileset_type = base_fileset
     type = request.params["type"]
     if type == "svn":
         svnroot = str(request.params["svn_url"])
         subdir = str(request.params["svn_subdir"])
         username = str(request.params["svn_username"])
         password = str(request.params["svn_password"])
         rev_time = time.strptime(str(request.params['svn_rev_time']), "%Y-%m-%d %H:%M:%S")
         exported_path = h.svn_export(svnroot, username, password, subdir, rev_time, fileset_type>0,unique_id)
         h.update_session('import_status', "Post-processing files", unique_id)
         fileset = None
         if fileset_type == student_fileset:
             fileset = FileSet()
         elif fileset_type == solution_fileset:
             fileset = SolutionFileSet()
             fileset.isSolutionSet = True
         elif fileset_type == base_fileset:
             fileset = BaseFileSet()
         fileset.name = fileset_name
         fileset.subdir = subdir
         fileset.tempDir = exported_path
         fileset.course = course
         fileset.offering = offering
         fileset.isComplete = False
         c.fileset = fileset
         files = h.lsdir(fileset.tempDir)
         files = map(lambda x: x[len(fileset.tempDir)+1:], files) #chop off temp dir from names
         files.sort()
         c.files = files
     if type == "archive":
         h.update_session('import_status', "Processing archive file",unique_id)
         subdir = request.params["archive_subdir"]
         myfile = request.POST['file']
         download_path = mkdtemp()
         path =  os.path.join(download_path, myfile.filename.replace(os.sep, '_'))        
         permanent_file = open(path,'wb')
         shutil.copyfileobj(myfile.file, permanent_file)
         myfile.file.close()
         permanent_file.close()
         extracted_directory_path = mkdtemp()
         h.update_session('import_status', "Extracting archive file",unique_id)
         if zipfile.is_zipfile(path):
             file = ZipFile(path, 'r')
             h.extract_zip(file, extracted_directory_path)
         if tarfile.is_tarfile(path):
             file = TarFile.open(path, 'r')
             #file.extractall(extracted_directory_path) removed for python 2.4 compat
             for member in file.getmembers():
                 file.extract(member, extracted_directory_path)
         h.update_session('import_status', "Post-processing files",unique_id)
         if fileset_type == student_fileset:
             fileset = FileSet()
         elif fileset_type == solution_fileset:
             fileset = SolutionFileSet()
             fileset.isSolutionSet = True
         elif fileset_type == base_fileset:
             fileset = BaseFileSet()
         fileset.name = fileset_name
         fileset.subdir = subdir
         fileset.tempDir = extracted_directory_path
         fileset.course = course
         fileset.offering = offering
         fileset.isComplete = False
         c.fileset = fileset
         files = h.lsdir(fileset.tempDir)
         files = map(lambda x: x[len(fileset.tempDir)+1:], files) #chop off temp dir from names
         files.sort()
         c.files = files
         shutil.rmtree(download_path)
     Session.commit()
     
     
     unique_files_dict = {}
     for filepath in c.files:
         filename = filepath.split("/")[-1]
         if not unique_files_dict.has_key(filename):
             unique_files_dict[filename] = 0
         else:
             unique_files_dict[filename] = unique_files_dict[filename] + 1
     
     c.unique_files = sorted(unique_files_dict.keys(), cmp=lambda x,y: cmp(unique_files_dict[x], unique_files_dict[y]), reverse=True)
     c.unique_files.insert(0, "None")
     
     h.del_from_session('import_status', unique_id)
     session.save()
     return render("/derived/import/filter_files.html")