Ejemplo n.º 1
0
 def test_zip(self):
     self.assertEqual(is_archive(ZIPPATH), True)
     self.assertEqual(is_archive(ZIPPATH, formats=('zip', )), True)
     self.assertEqual(is_archive(ZIPPATH, formats=('tar', )), False)
Ejemplo n.º 2
0
def is_tarfile(filename):
    return is_archive(filename, formats=('tar', 'gnu', 'pax'))
Ejemplo n.º 3
0
def is_zipfile(filename):
    return is_archive(filename, formats=('zip', ))
Ejemplo n.º 4
0
def is_zipfile(filename):
    return is_archive(filename, formats=('zip', ))
Ejemplo n.º 5
0
 def isarchive(path):
     try:
         return libarchive.is_archive(path)
     except:
         return libarchive.is_archive_name(path)
Ejemplo n.º 6
0
    def run(self):
        try:
            self.preload()
            abs_extract_path = self.get_abs_path(self.extract_path)

            if not os.path.exists(abs_extract_path):
                try:
                    os.makedirs(abs_extract_path)
                except Exception as e:
                    self.logger.error("Cannot create extract path %s. %s" % (str(e), traceback.format_exc()))
                    raise Exception("Cannot create extract path")
            elif os.path.isfile(abs_extract_path):
                raise Exception("Extract path incorrect - file exists")

            abs_archive_path = self.get_abs_path(self.file.get("path"))

            if not os.path.exists(abs_archive_path):
                raise Exception("Archive file is not exist")

            self.on_running(self.status_id, pid=self.pid, pname=self.name)
            self.logger.debug("Start extracting %s", abs_archive_path)

            # for rar and zip same algorithm
            if is_zipfile(abs_archive_path) or rarfile.is_rarfile(abs_archive_path) or SevenZFile.is_7zfile(
                    abs_archive_path):

                if is_zipfile(abs_archive_path):
                    self.logger.info("Archive ZIP type, using zipfile (beget)")
                    a = ZipFile(abs_archive_path)
                elif rarfile.is_rarfile(abs_archive_path):
                    self.logger.info("Archive RAR type, using rarfile")
                    a = rarfile.RarFile(abs_archive_path)
                else:
                    self.logger.info("Archive 7Zip type, using py7zlib")
                    a = SevenZFile(abs_archive_path)

                    # extract Empty Files first
                    for fileinfo in a.archive.header.files.files:
                        if not fileinfo['emptystream']:
                            continue

                        name = fileinfo['filename']
                        try:
                            unicode_name = name.encode('UTF-8').decode('UTF-8')
                        except UnicodeDecodeError:
                            unicode_name = name.encode('cp866').decode('UTF-8')

                        unicode_name = unicode_name.replace('\\', '/')  # For windows name in rar etc.

                        file_name = os.path.join(abs_extract_path, unicode_name)
                        dir_name = os.path.dirname(file_name)

                        if not os.path.exists(dir_name):
                            os.makedirs(dir_name)
                        if os.path.exists(dir_name) and not os.path.isdir(dir_name):
                            os.remove(dir_name)
                            os.makedirs(dir_name)
                        if os.path.isdir(file_name):
                            continue

                        f = open(file_name, 'w')
                        f.close()

                infolist = a.infolist()

                not_ascii = False

                # checking ascii names
                try:
                    abs_extract_path.encode('utf-8').decode('ascii')
                    for name in a.namelist():
                        name.encode('utf-8').decode('ascii')
                except UnicodeDecodeError:
                    not_ascii = True
                except UnicodeEncodeError:
                    not_ascii = True

                t = threading.Thread(target=self.progress, args=(infolist, self.extracted_files, abs_extract_path))
                t.daemon = True
                t.start()

                try:
                    if not_ascii:
                        for name in a.namelist():
                            try:
                                unicode_name = name.encode('UTF-8').decode('UTF-8')
                            except UnicodeDecodeError:
                                unicode_name = name.encode('cp866').decode('UTF-8')

                            unicode_name = unicode_name.replace('\\', '/')  # For windows name in rar etc.

                            file_name = os.path.join(abs_extract_path, unicode_name)
                            dir_name = os.path.dirname(file_name)

                            if not os.path.exists(dir_name):
                                os.makedirs(dir_name)
                            if os.path.exists(dir_name) and not os.path.isdir(dir_name):
                                os.remove(dir_name)
                                os.makedirs(dir_name)
                            if os.path.isdir(file_name):
                                continue

                            f = open(file_name, 'wb')
                            try:
                                data = a.read(name)
                                f.write(data)
                                f.close()
                            except TypeError:
                                # pass for directories its make recursively for files
                                f.close()
                                os.remove(file_name)

                    else:
                        self.logger.info("EXTRACT ALL to %s , encoded = %s" % (
                            pprint.pformat(abs_extract_path), pprint.pformat(abs_extract_path)))
                        a.extractall(abs_extract_path)  # Not working with non-ascii windows folders
                except Exception as e:
                    self.logger.error("Error extract path %s. %s" % (str(e), traceback.format_exc()))
                    raise e
                finally:
                    self.extracted_files["done"] = True
                    t.join()

            elif libarchive.is_archive(abs_archive_path):
                self.logger.info("Archive other type, using libarchive")

                next_tick = time.time() + REQUEST_DELAY
                print(pprint.pformat("Clock = %s ,  tick = %s" % (str(time.time()), str(next_tick))))

                infolist = []
                with libarchive.Archive(abs_archive_path, entry_class=Entry) as a:
                    for entry in a:
                        infolist.append(entry)

                with libarchive.Archive(abs_archive_path, entry_class=Entry) as a:
                    for entry in a:
                        entry_path = os.path.join(abs_extract_path, entry.pathname)
                        self.logger.debug("Entry pathname %s - %s", entry.pathname, entry.size)

                        if time.time() > next_tick:
                            progress = {
                                'percent': round(float(self.extracted_files["count"]) / float(len(infolist)), 2),
                                'text': str(int(
                                    round(float(self.extracted_files["count"]) / float(len(infolist)), 2) * 100)) + '%'
                            }

                            self.on_running(self.status_id, progress=progress, pid=self.pid, pname=self.name)
                            next_tick = time.time() + REQUEST_DELAY

                        self.extracted_files["count"] += 1
                        dir_name = os.path.dirname(entry_path)

                        if not os.path.exists(dir_name):
                            os.makedirs(dir_name)
                        if os.path.exists(dir_name) and not os.path.isdir(dir_name):
                            os.remove(dir_name)
                            os.makedirs(dir_name)
                        if os.path.isdir(entry_path):
                            continue

                        f = open(entry_path, 'w')
                        a.readpath(f)

            elif abs_archive_path[-3:] == ".gz":
                self.logger.info("gz file type, using gzip")
                try:
                    # if its just a gz file
                    a = gzip.open(abs_archive_path)
                    file_content = a.read()
                    a.close()

                    file_name = os.path.splitext(os.path.basename(abs_archive_path))[0]
                    file_path = os.path.join(abs_extract_path, file_name)
                    infolist = [file_name]
                    dir_name = os.path.dirname(file_path)

                    if not os.path.exists(dir_name):
                        os.makedirs(dir_name)

                    extracted = open(file_path, 'wb')
                    extracted.write(file_content)
                    extracted.close()
                except Exception as e:
                    raise e
                finally:
                    self.extracted_files["done"] = True
            else:
                raise Exception("Archive file has unkown format")

            progress = {
                'percent': round(float(self.extracted_files["count"]) / float(len(infolist)), 2),
                'text': str(int(round(float(self.extracted_files["count"]) / float(len(infolist)), 2) * 100)) + '%'
            }

            result = {}
            time.sleep(REQUEST_DELAY)
            self.on_success(self.status_id, progress=progress, data=result, pid=self.pid, pname=self.name)

        except Exception as e:
            self.extracted_files["done"] = True
            result = {
                "error": True,
                "message": str(e),
                "traceback": traceback.format_exc()
            }

            self.on_error(self.status_id, result, pid=self.pid, pname=self.name)
Ejemplo n.º 7
0
 def test_zip(self):
     self.assertEqual(is_archive(ZIPPATH), True)
     self.assertEqual(is_archive(ZIPPATH, formats=('zip', )), True)
     self.assertEqual(is_archive(ZIPPATH, formats=('tar', )), False)
Ejemplo n.º 8
0
    def run(self):
        try:
            self.preload()

            # prepare download dir strictly after dropping privileges
            if not os.path.exists(self.folder_for_archive):
                os.makedirs(self.folder_for_archive)
            if not os.path.exists(self.tmp_dir):
                os.makedirs(self.tmp_dir)

            sftp = self.get_sftp_connection(self.session)

            abs_extract_path = self.extract_path

            if not sftp.exists(abs_extract_path):
                try:
                    sftp.makedirs(abs_extract_path)
                except Exception as e:
                    self.logger.error("Cannot create extract path %s. %s" %
                                      (str(e), traceback.format_exc()))
                    raise Exception("Cannot create extract path")
            elif sftp.isfile(abs_extract_path):
                raise Exception("Extract path incorrect - file exists")

            abs_archive_path = self.file.get("path")
            archive_name = os.path.basename(abs_archive_path)
            # copy archive to local fs
            synced_archive_filename = os.path.join(self.folder_for_archive,
                                                   archive_name)
            sftp.rsync_from(abs_archive_path, self.folder_for_archive)

            if not os.path.exists(synced_archive_filename):
                raise Exception("Archive file is not exist")

            self.on_running(self.status_id, pid=self.pid, pname=self.name)
            self.logger.debug("Start extracting %s", abs_archive_path)

            # for rar and zip same algorithm
            if is_zipfile(synced_archive_filename) or\
                    rarfile.is_rarfile(synced_archive_filename) or\
                    SevenZFile.is_7zfile(synced_archive_filename):

                if is_zipfile(synced_archive_filename):
                    self.logger.info("Archive ZIP type, using zipfile (beget)")
                    a = ZipFile(synced_archive_filename)
                elif rarfile.is_rarfile(synced_archive_filename):
                    self.logger.info("Archive RAR type, using rarfile")
                    a = rarfile.RarFile(synced_archive_filename)
                else:
                    self.logger.info("Archive 7Zip type, using py7zlib")
                    a = SevenZFile(synced_archive_filename)

                    # extract Empty Files first
                    for fileinfo in a.archive.header.files.files:
                        if not fileinfo['emptystream']:
                            continue

                        name = fileinfo['filename']
                        try:
                            unicode_name = name.encode('UTF-8').decode('UTF-8')
                        except UnicodeDecodeError:
                            unicode_name = name.encode('cp866').decode('UTF-8')

                        unicode_name = unicode_name.replace(
                            '\\', '/')  # For windows name in rar etc.

                        file_name = os.path.join(self.tmp_dir, unicode_name)
                        dir_name = os.path.dirname(file_name)

                        if not os.path.exists(dir_name):
                            os.makedirs(dir_name)
                        if os.path.exists(
                                dir_name) and not os.path.isdir(dir_name):
                            os.remove(dir_name)
                            os.makedirs(dir_name)
                        if os.path.isdir(file_name):
                            continue

                        f = open(file_name, 'w')
                        f.close()

                infolist = a.infolist()

                not_ascii = False

                # checking ascii names
                try:
                    self.tmp_dir.encode('utf-8').decode('ascii')
                    for name in a.namelist():
                        name.encode('utf-8').decode('ascii')
                except UnicodeDecodeError:
                    not_ascii = True
                except UnicodeEncodeError:
                    not_ascii = True

                t = threading.Thread(target=self.progress,
                                     args=(infolist, self.extracted_files,
                                           abs_extract_path))
                t.daemon = True
                t.start()

                try:
                    if not_ascii:
                        for name in a.namelist():
                            try:
                                unicode_name = name.encode('UTF-8').decode(
                                    'UTF-8')
                            except UnicodeDecodeError:
                                unicode_name = name.encode('cp866').decode(
                                    'UTF-8')

                            unicode_name = unicode_name.replace(
                                '\\', '/')  # For windows name in rar etc.

                            file_name = os.path.join(self.tmp_dir,
                                                     unicode_name)
                            dir_name = os.path.dirname(file_name)

                            if not os.path.exists(dir_name):
                                os.makedirs(dir_name)
                            if os.path.exists(
                                    dir_name) and not os.path.isdir(dir_name):
                                os.remove(dir_name)
                                os.makedirs(dir_name)
                            if os.path.isdir(file_name):
                                continue

                            f = open(file_name, 'wb')
                            try:
                                data = a.read(name)
                                f.write(data)
                                f.close()
                            except TypeError:
                                # pass for directories its make recursively for files
                                f.close()
                                os.remove(file_name)

                    else:
                        self.logger.info("EXTRACT ALL to %s , encoded = %s" %
                                         (pprint.pformat(self.tmp_dir),
                                          pprint.pformat(self.tmp_dir)))
                        a.extractall(
                            self.tmp_dir
                        )  # Not working with non-ascii windows folders
                except Exception as e:
                    self.logger.error("Error extract path %s. %s" %
                                      (str(e), traceback.format_exc()))
                    raise e
                finally:
                    self.extracted_files["done"] = True
                    t.join()

            elif libarchive.is_archive(synced_archive_filename):
                self.logger.info("Archive other type, using libarchive")

                next_tick = time.time() + REQUEST_DELAY
                print(
                    pprint.pformat("Clock = %s ,  tick = %s" %
                                   (str(time.time()), str(next_tick))))

                infolist = []
                with libarchive.Archive(synced_archive_filename,
                                        entry_class=Entry) as a:
                    for entry in a:
                        infolist.append(entry)

                with libarchive.Archive(synced_archive_filename,
                                        entry_class=Entry) as a:
                    for entry in a:
                        entry_path = os.path.join(self.tmp_dir, entry.pathname)
                        self.logger.debug("Entry pathname %s - %s",
                                          entry.pathname, entry.size)

                        if time.time() > next_tick:
                            progress = {
                                'percent':
                                round(
                                    float(self.extracted_files["count"]) /
                                    float(len(infolist)), 2),
                                'text':
                                str(
                                    int(
                                        round(
                                            float(self.extracted_files["count"]
                                                  ) / float(len(infolist)), 2)
                                        * 100)) + '%'
                            }

                            self.on_running(self.status_id,
                                            progress=progress,
                                            pid=self.pid,
                                            pname=self.name)
                            next_tick = time.time() + REQUEST_DELAY

                        self.extracted_files["count"] += 1
                        dir_name = os.path.dirname(entry_path)

                        if not os.path.exists(dir_name):
                            os.makedirs(dir_name)
                        if os.path.exists(
                                dir_name) and not os.path.isdir(dir_name):
                            os.remove(dir_name)
                            os.makedirs(dir_name)
                        if os.path.isdir(entry_path):
                            continue

                        f = open(entry_path, 'w')
                        a.readpath(f)

            elif abs_archive_path[-3:] == ".gz":
                self.logger.info("gz file type, using gzip")
                try:
                    # if its just a gz file
                    a = gzip.open(synced_archive_filename)
                    file_content = a.read()
                    a.close()

                    file_name = os.path.splitext(
                        os.path.basename(synced_archive_filename))[0]
                    file_path = os.path.join(self.tmp_dir, file_name)
                    infolist = [file_name]
                    dir_name = os.path.dirname(file_path)

                    if not os.path.exists(dir_name):
                        os.makedirs(dir_name)

                    extracted = open(file_path, 'wb')
                    extracted.write(file_content)
                    extracted.close()
                except Exception as e:
                    raise e
                finally:
                    self.extracted_files["done"] = True
            else:
                raise Exception("Archive file has unknown format")

            sftp.rsync_to(self.tmp_dir + '/.', abs_extract_path)
            os.remove(synced_archive_filename)
            shutil.rmtree(self.tmp_dir)

            progress = {
                'percent':
                round(
                    float(self.extracted_files["count"]) /
                    float(len(infolist)), 2),
                'text':
                str(
                    int(
                        round(
                            float(self.extracted_files["count"]) /
                            float(len(infolist)), 2) * 100)) + '%'
            }

            result = {}
            time.sleep(REQUEST_DELAY)
            self.on_success(self.status_id,
                            progress=progress,
                            data=result,
                            pid=self.pid,
                            pname=self.name)

        except Exception as e:
            self.extracted_files["done"] = True
            result = {
                "error": True,
                "message": str(e),
                "traceback": traceback.format_exc()
            }
            self.logger.error("SFTP ExtractArchive Error = {}".format(result))

            self.on_error(self.status_id,
                          result,
                          pid=self.pid,
                          pname=self.name)

        finally:
            if os.path.exists(self.tmp_dir):
                shutil.rmtree(self.tmp_dir)

            if os.path.exists(self.folder_for_archive):
                shutil.rmtree(self.folder_for_archive)
Ejemplo n.º 9
0
 def isarchive(path):
     try:
         return libarchive.is_archive(path)
     except:
         return libarchive.is_archive_name(path)