Beispiel #1
1
def extractFiles(indir="/Users/Pratik/Documents/Pratik/Work/practice/py-data-analysis", out="/Users/Pratik/Documents/Pratik/Work/practice/py-data-analysis/extracted"):
    os.chdir(indir)                     # change directory
    archives = glob.glob("*.gz")        # get all archive files that end in .gz
    if not os.path.exists(out):         # if folder doesn't exist make it
        os.mkdirs(out)       
    files = os.listdir("extracted")     # get list of all the files currently in the directory
    for archive in archives:            # loop through archives and extract files
        if archive[:-3] not in files:   # if file is already in folder don't extract (cut out exten)
            patoolib.extract_archive(archive, outdir=out)
Beispiel #2
0
def get_starling_data():
    if len(glob("../../data/processed/starling/delineations/*.img")) > 1:
        print("Data already download")
        return

    print("Downloading data")
    dl_output = "../../data/raw/starling/"  # data download
    img_output = "../../data/processed/starling/"  # processed save spot
    data_url = "http://uahost.uantwerpen.be/bioimaginglab/starling.zip"

    # ensure directories
    ensure_dir(dl_output)
    ensure_dir(img_output)
    ensure_dir(img_output + "delineations/")
    zip_loc = dl_output + "starling.zip"

    # download data
    tqdm_download(data_url, zip_loc)

    # extract the data
    patoolib.extract_archive(zip_loc, outdir=dl_output)

    # move the data to the correct location
    for img_file in np.concatenate(
        [glob(dl_output + "ATLAS_starling/*." + ed) for ed in ["img", "hdr"]]):
        shutil.copy(img_file, img_output + os.path.basename(img_file))
    for img_file in np.concatenate([
            glob(dl_output + "ATLAS_starling/delineations/*." + ed)
            for ed in ["img", "hdr", "txt"]
    ]):
        shutil.copy(img_file,
                    img_output + "delineations/" + os.path.basename(img_file))
Beispiel #3
0
    def _install_anime4kcpp(self):
        """ Install Anime4KCPP
        """
        print('\nInstalling Anime4KCPP')

        import patoolib
        import requests

        # get latest release of Anime4KCPP via Github API
        # at the time of writing this portion, Anime4KCPP doesn't yet have a stable release
        # therefore releases/latest won't work
        latest_release = requests.get(
            'https://api.github.com/repos/TianZerL/Anime4KCPP/releases/latest'
        ).json()

        for a in latest_release['assets']:
            if re.search(r'Anime4KCPP_CLI-.*-Win64-msvc\.7z',
                         a['browser_download_url']):
                anime4kcpp_7z = download(a['browser_download_url'],
                                         tempfile.gettempdir())
                self.trash.append(anime4kcpp_7z)

        # if running in PyInstaller, add sys._MEIPASS\7z to path
        # this directory contains 7za.exe and its DLL files
        with contextlib.suppress(AttributeError):
            os.environ['PATH'] += f';{sys._MEIPASS}\\7z'

        # (LOCALAPPDATA / 'video2x' / 'anime4kcpp').mkdir(parents=True, exist_ok=True)
        # pyunpack.Archive(anime4kcpp_7z).extractall(LOCALAPPDATA / 'video2x' / 'anime4kcpp')
        if (LOCALAPPDATA / 'video2x' / 'anime4kcpp').exists():
            shutil.rmtree(LOCALAPPDATA / 'video2x' / 'anime4kcpp')
        patoolib.extract_archive(str(anime4kcpp_7z),
                                 outdir=str(LOCALAPPDATA / 'video2x' /
                                            'anime4kcpp'))
def extract_archive_data(archive_data, archive_name, folder):
    """Extract and save data from compressed folder from config map.

    Args:
        archive_data (str): Base64 encoded jsonnet folder (archive).
        archive_name (str): Archive name (needed for extensions).
        folder (str): Folder that will be created and archive extracted to.

    Returns:
        None
    """
    try:
        if not os.path.exists(folder):
            os.mkdir(folder)
    except OSError as e:
        log.error(f"Error when creating folder {folder}, error: {e}")
        return

    try:
        with open(archive_name, "wb") as f:
            f.write(base64.b64decode(archive_data))
    except (binascii.Error, IOError) as e:
        log.error(f"Error when decoding {archive_name}, error: {e}")

    try:
        patoolib.extract_archive(archive_name, outdir=folder, verbosity=-1)
        log.info(f"File {archive_name} extracted to {folder}")
    except patoolib.util.PatoolError as e:
        log.error(f"Error when extracting {archive_name}, error: {e}")

    remove_file("./", archive_name)
Beispiel #5
0
def cbzgenerator(namefile, origen):
    logfile = origen + '/cbrconverter.log'
    parents, filename = os.path.split(namefile)
    temporal = parents + '/temporal'
    try:
        os.mkdir(temporal)
    except OSError:
        print("Creation of the directory %s failed" % temporal)
    print(namefile)
    try:
        patoolib.extract_archive(namefile, outdir=temporal)
    except:
        f = open(logfile, "a")
        f.write("Error descomprimiendo: " + namefile + '\n')
        f.close()
        try:
            shutil.rmtree(temporal)
        except OSError:
            print('Error while deleting directory')
    os.rename(namefile, namefile + ".extraido")
    archivos = glob.glob(temporal + '/**/*.*', recursive=True)
    archivos.sort()

    filename2, file_extension = os.path.splitext(filename)
    cbz = parents + '/' + filename2 + '.cbz.new'
    zipobje = ZipFile(cbz, 'w')
    for archivos2 in archivos:
        datemodified(archivos2)
        ruta, nombrearchivo = os.path.split(archivos2)
        zipobje.write(archivos2, basename(nombrearchivo))
    zipobje.close()
    try:
        shutil.rmtree(temporal)
    except:
        print('Error while deleting directory')
Beispiel #6
0
def run():
    # Get data directory from environment
    datadir = os.environ.get('NATURA_DATA_DIRECTORY', '')
    if not datadir:
        print('Datadir not found, please specify NATURA_DATA_DIRECTORY env var.')
        return

    # Get geographic files
    url = const.NATURA2000_SITES_SOURCE
    filepath = os.path.join(datadir, os.path.basename(url))
    print('Downloading file', url, filepath)
    response = requests.get(url, stream=True)
    with open(filepath, "wb") as handle:
        for data in response.iter_content(chunk_size=1024):
            if data:
                handle.write(data)

    print('Unpacking file', filepath)
    patoolib.extract_archive(filepath, outdir=datadir)

    # Get tabular data
    url = const.NATURA2000_TABLE_SOURCE
    filepath = os.path.join(datadir, 'natura2000_tabular.zip')
    print('Downloading file', url, filepath)
    response = requests.get(url, stream=True)
    with open(filepath, "wb") as handle:
        for data in response.iter_content(chunk_size=1024):
            if data:
                handle.write(data)

    print('Unpacking file', filepath)
    patoolib.extract_archive(filepath, outdir=datadir)
def ftpDownloader(Id,
                  startID,
                  endID,
                  url="<url here>",
                  user="******",
                  passwd="<password here>"):
    ftp = FTP(url)
    ftp.login(user, passwd)
    if not os.path.exists(pathname):
        os.makedirs(pathname)
    print(ftp.nlst())

    ftp.cwd("<ftp working durectory here>")
    os.chdir(pathname)

    for array in range(startID, endID + 1):
        #Enter full path below, including start and stop IDs
        fullpath = '<insert ftp path here>' % (array, Id, array)
        filename = os.path.basename(fullpath)
        try:
            with open(filename, 'wb') as file:
                ftp.retrbinary('RETR %s' % fullpath, file.write)
                print("%s downloaded" % filename)
                if filename[-3:] == ".gz" or filename[
                        -4:] == ".zip" or filename[-4:] == ".tar":
                    patoolib.extract_archive(filename, outdir="unpack")
        except error_perm:
            print("%s is not available" % filename)
            os.remove(filename)
    ftp.close()
def main():
    top = tkinter.Tk()
    top.withdraw()
    archivepath = ''
    if (len(sys.argv) == 1):
        archivepath = askopenfilename(title="Selecteaza arhiva")
    else:
        archivepath = sys.argv[1]
    folderpath = os.path.dirname(archivepath)
    filename = '.'.join(os.path.basename(archivepath).split('.')[:-1])
    destinationPath = folderpath

    if (len(sys.argv) == 3):
        destinationPath = sys.argv[2]
    else:
        destinationPath = askdirectory(
            title="Selecteaza directorul unde se vor dezarhiva fisierele.",
            initialdir=destinationPath)

    destFilesPath = os.path.join(destinationPath,
                                 DEST_FOLDER_PREFIX + filename)
    if (not os.path.exists(destFilesPath)):
        os.makedirs(destFilesPath)
    patoolib.extract_archive(archivepath, outdir=destFilesPath)
    exit()
Beispiel #9
0
def extract_archive(from_path, to_path=None, remove_finished=False):
    if to_path is None:
        to_path = os.path.dirname(from_path)

    if _is_tar(from_path):
        with tarfile.open(from_path, 'r') as tar:
            tar.extractall(path=to_path)
    elif _is_targz(from_path):
        with tarfile.open(from_path, 'r:gz') as tar:
            tar.extractall(path=to_path)
    elif _is_tarxz(from_path) and PY3:
        # .tar.xz archive only supported in Python 3.x
        with tarfile.open(from_path, 'r:xz') as tar:
            tar.extractall(path=to_path)
    elif _is_gzip(from_path):
        to_path = os.path.join(
            to_path,
            os.path.splitext(os.path.basename(from_path))[0])
        with open(to_path, "wb") as out_f, gzip.GzipFile(from_path) as zip_f:
            out_f.write(zip_f.read())
    elif _is_zip(from_path):
        with zipfile.ZipFile(from_path, 'r') as z:
            z.extractall(to_path)
    elif _is_rar(from_path):
        patoolib.extract_archive(from_path, outdir=to_path)

    else:
        raise ValueError("Extraction of {} not supported".format(from_path))

    if remove_finished:
        os.remove(from_path)
Beispiel #10
0
def unpack(input_file, output_folder):
    """
    :param input_file: file to unpack
    :param output_folder: folder to save file unpacked
    :return: Message of unpacked or no file detected

    """
    print(input_file)
    if os.path.isdir(input_file):
        return False

    with open(input_file, "rb") as file:
        info = fleep.get(file.read(128))

    extensions_supported = {'rar', '7z', 'dmg', 'gz', 'iso', 'tar.z', 'zip'}

    print(info.extension)

    if not info.extension:
        result = False
    elif set(info.extension) & extensions_supported:
        print(set(info.extension) & extensions_supported)
        print('---> File recognized, unpacking <---')
        try:
            patoolib.extract_archive(input_file, outdir=output_folder)
        # probably a false positive
        except PatoolError:
            return False
        result = True
    else:
        result = False
    return result
Beispiel #11
0
def decompress_file(file, dir, directories='strip'):
    fullcmd = None
    for ptr, cmd in DECOMPRESSORS.iteritems():
        if re.search(ptr, file):
            fullcmd = cmd % locals()
            break
#    if fullcmd is not None:
#        lgr.debug("Extracting file: %s" % fullcmd)
#        status, output = getstatusoutput(fullcmd)  # getstatusoutput is deprecated. Use cmd.Runner.run() instead.
#        if status:
#            lgr.debug("Failed to extract: status %d output %s" % (status, output))
#    else:
    #lgr.debug("Have no clue how to extract %s -- using patool" % file)
    verbosity = -1                        # silent by default
    ef_level = lgr.getEffectiveLevel() 
    if ef_level and lgr.getEffectiveLevel() <= logging.DEBUG:
        verbosity = 1
    #elif lgr.getEffectiveLevel() <= logging.INFO:
    #    verbosity = 0
    patoolib.extract_archive(file, outdir=dir, verbosity=verbosity)
    if directories == 'strip':
        _, dirs, files = os.walk(dir).next()
        if not len(files) and len(dirs) == 1:
            # move all the content under dirs[0] up 1 level
            subdir, subdirs_, files_ = os.walk(join(dir, dirs[0])).next()
            for f in subdirs_ + files_:
                os.rename(join(subdir, f), join(dir, f))
    else:
        raise NotImplementedError("Not supported %s" % directories)
Beispiel #12
0
def extra_file(file_path):
    file_extra_dir = file_path[0:file_path.rfind(os.path.sep)]
    print("file_extra_dir: ", file_extra_dir)
    if not os.path.exists(file_extra_dir):
        os.mkdir(file_extra_dir)
    patoolib.extract_archive(file_path, outdir=file_extra_dir)
    return file_extra_dir
Beispiel #13
0
def writing_to_BD():
    url = 'http://programtv.ru/xmltv.xml.gz'
    response = requests.get(url)

    xml_gz = open(
        os.path.dirname(os.path.realpath(__file__)) + '\\xmltv.xml.gz', 'wb')
    xml_gz.write(response.content)
    xml_gz.close()

    os.chdir(os.path.dirname(os.path.realpath(__file__)))
    patoolib.extract_archive(
        os.path.dirname(os.path.realpath(__file__)) + '\\xmltv.xml.gz',
        outdir=os.path.dirname(os.path.realpath(__file__)))
    with open("xmltv.xml", encoding='utf-8') as fobj:
        xml = fobj.read().encode('utf-8')

    root = etree.fromstring(xml)

    for elem in root:
        if elem.tag == "channel":
            num_of_channels = session.query(BD.Channel.name).filter(
                BD.Channel.id_channel == elem.get('id')).count()
            if num_of_channels == 0:
                table_channel = BD.Channel(id_channel=elem.get('id'),
                                           name=elem[1].text)
                session.add(table_channel)
                #session.commit()

        if elem.tag == "programme":
            num_of_id_telecasts = session.query(BD.Telecast.id).filter(
                BD.Telecast.name == elem[0].text).count()
            if num_of_id_telecasts == 0:
                table_telecast = BD.Telecast(name=elem[0].text)
                session.add(table_telecast)
                #session.commit()

            num_of_id_genres = session.query(
                BD.Genre.id).filter(BD.Genre.name == elem[1].text)
            if num_of_id_genres == 0:
                table_genre = BD.Genre(name=elem[1].text)
                session.add(table_genre)
                #session.commit()

            start = parser.parse(elem.get('start'))
            start = start.strftime("%Y-%m-%d %H:%M:%S")

            end = parser.parse(elem.get('stop'))
            end = end.strftime("%Y-%m-%d %H:%M:%S")

            id_telecast = session.query(BD.Telecast.id).filter(
                BD.Telecast.name == elem[0].text).first()

            table_tvprogram = BD.TVprogram(channel=elem.get('channel'),
                                           telecast=id_telecast,
                                           start_time=start,
                                           end_time=end)
            session.add(table_tvprogram)
            #session.commit()
    session.commit()
Beispiel #14
0
def extract_rar_data(source, target, extract_src = False):
    if extract_src:
        for file in os.listdir(source):
            if file.endswith(".rar"):
                patoolib.extract_archive(source+file, outdir = target)
    allrar = [y for x in os.walk(target) for y in glob(os.path.join(x[0], '*.rar'))]
    for file in allrar:
        patoolib.extract_archive(file, outdir = target)
Beispiel #15
0
		def uncompressFile(self,from_location,to_location):
			if from_location.endswith(".zip"):
				zfile = zipfile.ZipFile(from_location)
				zfile.extractall(to_location)
				print("uncompressing:" + from_location) 	

			elif from_location.endswith(".rar"):
				patoolib.extract_archive(from_location, outdir=to_location) 
Beispiel #16
0
    def extract_to_dir(archive_path, to_dir):
        """Extract the content of an archive in to_dir.

        archive_path (string): path of the archive to extract.
        to_dir (string): destination directory.

        """
        patoolib.extract_archive(archive_path, outdir=to_dir, interactive=False)
Beispiel #17
0
 def _extract(self):
     try:
         if not os.path.isdir(self.e_path):
             os.mkdir(self.e_path)
         patoolib.extract_archive(self.a_path, outdir=self.e_path)
         print("FILES EXTRACTED")
     except Exception as e:
         print(f"Failed: {str(e)}")
Beispiel #18
0
def uncompressFile(from_location, to_location):
    if from_location.endswith(".zip"):
        zfile = zipfile.ZipFile(from_location)
        zfile.extractall(to_location)
        print("uncompressing:" + from_location)

    elif from_location.endswith(".rar"):
        patoolib.extract_archive(from_location, outdir=to_location)
Beispiel #19
0
    def extract_to_dir(archive_path, to_dir):
        """Extract the content of an archive in to_dir.

        archive_path (string): path of the archive to extract.
        to_dir (string): destination directory.

        """
        patoolib.extract_archive(archive_path, outdir=to_dir)
 def unpack_photo_archive(self, rar_url):
     archive_dir = Path(self.storage, r'archive')
     try:
         os.mkdir(archive_dir)
     except FileExistsError:
         self.clean_directory(archive_dir)
     patoolib.extract_archive(rar_url, outdir=archive_dir)
     return archive_dir
Beispiel #21
0
def convert_rar_to_txt(download_path):
    # 解压内容放到同名的目录
    output_dir = download_path[:download_path.rfind('.')]
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)

    # 如果文件夹下为空 解压
    if len(os.listdir(output_dir)) < 1:
        patoolib.extract_archive(download_path, outdir=output_dir)

    # 检查是不是多了一层目录
    if len(os.listdir(output_dir)) == 1:
        for extra in os.listdir(output_dir):
            if extra.endswith('.DS_Store'):
                continue
            else:
                # 多余的一层目录 把下面的文件考出来 然后目录删掉
                tobe_del_dir = os.path.join(output_dir, extra)
                if os.path.isdir(tobe_del_dir):
                    for pdf_file in os.listdir(tobe_del_dir):
                        if not pdf_file.endswith('.DS_Store'):
                            # 拷贝到父目录
                            shutil.move(os.path.join(tobe_del_dir, pdf_file),
                                        os.path.join(output_dir, pdf_file))
                    # 删除
                    os.removedirs(tobe_del_dir)

    # 筛选
    logging.info(output_dir)
    for pdf_file in os.listdir(output_dir):
        if os.path.isfile(os.path.join(output_dir, pdf_file)):
            if pdf_file.endswith('.DS_Store'):
                continue
            elif pdf_file.endswith('.tif'):
                # 图片合并转化
                merge_dir_pic_to_txt(output_dir)
                # 图片转化完成后 就break
                break
            elif pdf_file.endswith('.jpg') or pdf_file.endswith('.png'):
                # 图片合并转化
                merge_dir_pic_to_txt(output_dir)
                # 图片转化完成后 就break
                break
            elif pdf_file.endswith('.doc') or pdf_file.endswith('.docx'):
                if need_to_save(pdf_file):
                    doc_path = os.path.join(output_dir, pdf_file)
                    convert_doc_to_txt(doc_path)
            elif pdf_file.endswith('.pdf'):
                if need_to_save(pdf_file):
                    pdf_path = os.path.join(output_dir, pdf_file)
                    convert_pdf_to_txt(pdf_path)
            elif pdf_file.endswith('.txt'):
                continue
            else:
                logging.info('解压出来的文件不知道怎么处理 %s/%s' % (output_dir, pdf_file))

    logging.info('')
    return
Beispiel #22
0
def _analyze_compressed_file(parent, node, path, nesting_level):
    m_type = mime.from_file(path)
    size = os.path.getsize(path)

    m = md5()
    s = sha1()
    with open(path, "rb") as f:
        for chunk in iter(lambda: f.read(4096), b""):
            m.update(chunk)
            s.update(chunk)

    str_md5 = m.hexdigest()
    str_sha1 = s.hexdigest()

    str_fuzzy = ssdeep.hash_from_file(path)

    node['filename'] = os.path.basename(path)
    node['mime_type'] = m_type
    node['size'] = size
    node['md5'] = str_md5
    node['sha1'] = str_sha1
    node['fuzzy'] = str_fuzzy
    node['nesting_level'] = nesting_level + 1
    str_fuzzy = ssdeep.hash_from_file(path)
    node['fuzzy'] = str_fuzzy
    if parent is None:
        node['parent_hash'] = None
    else:
        node['parent_hash'] = parent.get('sha1')

    node['compressed_children'] = []

    # If this is a compressed file, analyze it recursively. This means we need to create a new directory, uncompress
    # files there and calculate hashes. Then, delete the extracted files when done.
    # zip, x-tar, x-7z-compressed, x-rar, vnd.ms-cab-compressed, gzip, x-bzip2, x-7z-compressed
    tmpdir = tempfile.mkdtemp()
    try:
        # Brute force approach: we don't even check the mime file. We try to unpack evey archive.
        # Extract all the files
        patoolib.extract_archive(path, outdir=tmpdir)

        # Analyze each file
        files = [
            os.path.join(tmpdir, f) for f in os.listdir(tmpdir)
            if os.path.isfile(os.path.join(tmpdir, f))
        ]
        for f in files:
            child = dict()
            _analyze_compressed_file(parent=node,
                                     node=child,
                                     path=f,
                                     nesting_level=nesting_level + 1)
            node['compressed_children'].append(child)
    except:
        pass
    finally:
        # Remove the temporary file directory
        shutil.rmtree(tmpdir)
Beispiel #23
0
def extractFiles(indir = "weather-data", out="extracted"):
    os.chdir(indir)
    archives = glob.glob("*.gz")
    if not os.path.exists(out):
        os.makedirs(out)
    files = os.listdir("extracted")
    for archive in archives:
        if archive[:-3] not in files:
            patoolib.extract_archive(archive, outdir = out)
Beispiel #24
0
def download_extract_all(urls, dir_save, extract_data=True, verbose=True):
    """Download urls + extract files to disk.

    Download + extract all url files to disk. If clean_cache is
    True, it removes the download files.

    Parameters
    ----------
    urls : list/tuple
        List/tuple of URL paths.
    dir_save : str
        Directory to store the downloaded data.
    extract_data : bool, optional
        Extracts/unpacks the data files (if true).
    verbose : bool, optional
        Display messages on screen if set to True.

    Raises
    ------
    Exception
        If it is an invalid url type.

    """
    # Check if urls is a str
    if isinstance(urls, str):
        urls = [urls]

    # check if the save directory exists
    if not os.path.exists(dir_save):
        os.makedirs(dir_save)

    # download + extract data and remove temporary files
    for i, url in enumerate(urls):
        if verbose:
            print('\nDownload url ({}/{}): {}'.format(i + 1, len(urls), url))

        url, md5hash, filename, extract_dir, method = parse_url(url)

        save_dir = os.path.join(dir_save, extract_dir)
        filename = os.path.join(save_dir, filename)

        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        if os.path.exists(filename):
            print('File already exists, skip downloading this url.')
        else:
            status, err = download_url(url, filename, method, verbose)
            if not status:
                raise Exception(err)

            if md5hash:
                md5_checksum(filename, md5hash)
            if extract_data:
                patoolib.extract_archive(filename,
                                         outdir=save_dir,
                                         verbosity=verbose)
Beispiel #25
0
 def decompress_to_temp(self, file):
     try:
         patoolib.extract_archive(archive=os.path.join(file), verbosity=0, outdir=self.TEMPDIR)
     except Exception as e:
         print('Decompression failed. Exception Thrown: ' + str(e))
         self.logfile.log_error('Decompression failed. Exception Thrown: ' + file + str(e))
         return False
     else:
         return True
Beispiel #26
0
    def unpack(self):
        """Extract archive's content to a temporary directory.

        return (string): the path of the temporary directory.

        """
        self.temp_dir = tempfile.mkdtemp(dir=config.temp_dir)
        patoolib.extract_archive(self.path, outdir=self.temp_dir)
        return self.temp_dir
Beispiel #27
0
def extractFiles(indir='/home/oleh/PYDATA', out='/home/oleh/PYDATA/Extracted'):
    os.chdir(indir)
    archives = glob.glob("*.gz")
    if not os.path.exists(out):
        os.makedirs(out)
    files = os.listdir("Extracted")
    for archive in archives:
        if archive[:-3] not in files:
            patoolib.extract_archive(archive, outdir=out)
Beispiel #28
0
def rar_cvrt():
    test_folder = os.path.join(PARENT_FOLDER, 'testrar')
    for file_name in os.listdir(test_folder):
        file_path = os.path.join(test_folder, file_name)
        if file_name.endswith('.rar'):
            extract_dir = file_path[:file_path.rfind('.')]
            if not os.path.exists(extract_dir):
                os.mkdir(extract_dir)
            patoolib.extract_archive(file_path, outdir=extract_dir)
Beispiel #29
0
    def unpack(self):
        """Extract archive's content to a temporary directory.

        return (string): the path of the temporary directory.

        """
        self.temp_dir = tempfile.mkdtemp(dir=config.temp_dir)
        patoolib.extract_archive(self.path, outdir=self.temp_dir)
        return self.temp_dir
Beispiel #30
0
    def ExtractRarFile(self, RarFileName, destPath):
        """Rar file extraction procedure

        Args:
            self: The reserved object 'self'
            RarFileName: Extractable file path + name
            destPath: Destination path for extracted files
        """
        patoolib.extract_archive(RarFileName, outdir=destPath)
Beispiel #31
0
    def extract(self, src, dest):
        #make destination directory first, or else patool complains
        #also change replay
        try:
            os.mkdir(dest)
        except OSError:
            pass

        extract_archive(src, outdir=dest)
Beispiel #32
0
def extractFiles(indir="/Users/Pratik/Documents/Pratik/Work/practice/py-data-analysis", out="/Users/Pratik/Documents/Pratik/Work/practice/py-data-analysis/extracted"):
    os.chdir(indir)                     
    archives = glob.glob("*.gz")        
    if not os.path.exists(out):        
        os.mkdirs(out)       
    files = os.listdir("extracted")    
    for archive in archives:            
        if archive[:-3] not in files:   
            patoolib.extract_archive(archive, outdir=out)
Beispiel #33
0
 def scan_archives(self):
     for file_path in utils.get_files(self.unsorted_path):
         if utils.is_compressed(file_path, self.extensions):
             patoolib.extract_archive(
                 file_path, outdir=self.unsorted_path, verbosity=-1
             )
             self.process_response(['info', f'Extracting {file_path}'])
             response = utils.delete_file(file_path)
             self.process_response(response)
Beispiel #34
0
 def download():
     """
     Download Tourism dataset.
     """
     if os.path.isdir(DATASET_PATH):
         logging.info(f'skip: {DATASET_PATH} directory already exists.')
         return
     download(DATASET_URL, DATASET_FILE_PATH)
     patoolib.extract_archive(DATASET_FILE_PATH, outdir=DATASET_PATH)
Beispiel #35
0
    def _load_data(self, rdir, infos, filestring):

        self.X = np.zeros((0, 250000))
        ## w
        for idx, info in enumerate(infos):

            # directory to put the raw rar file
            rawdir = os.path.join(rdir, 'raw')
            self._mkdir(rawdir)

            # path to find the file
            fpath = os.path.join(rawdir, info[0] + '.rar')

            # if file already exists, avoid duplicate downloads
            if not os.path.exists(fpath):
                print("no dir/file")
                self._download(fpath, info[3].rstrip('\n'))

            # compressed file to uncompress
            cmpfile = rawdir + '/' + info[0] + '.rar'

            print("file to exrtract is is::")
            print(cmpfile)

            # unpack file
            if not os.path.exists(rdir + '/' + info[0]):
                pa.extract_archive(cmpfile, outdir=rdir, program=rarpath)
            else:
                print("file already extracted, skipping unrar")

            # a list of all files in the extracted dir
            ddir = rdir + '/' + info[0]
            flist_all = os.listdir(ddir)

            # print("filelist:")
            # print(flist_all)

            # use the searchstring, build from the program arguments to find files of interest
            flistsorted = [i for i in flist_all if filestring in i]

            print("sorted filelist:")
            print(flistsorted)

            # now build the dataset from all files of interest
            # iterate through the filelist
            for f in flistsorted:
                # load matlab file
                mat_dict = loadmat(ddir + '/' + f)  #,struct_as_record=False)

                # get the values key, tha name of thenactual dataset equal to filename
                #key = list(filter(lambda x: 'N15_M07_F04_' in x, mat_dict.keys()))
                key = list(filter(lambda x: filestring in x, mat_dict.keys()))
                # load data
                #time_series = mat_dict[key[0]][:, 0] #['Y']
                time_series = mat_dict[key[0]]['Y'][0, 0][0, 6][2][:][0]

                self.X = np.vstack((self.X, time_series[0:250000]))
Beispiel #36
0
def extract() -> None:
    try:
        os.mkdir(os.path.join(cd, "ttsdk"))
    except FileExistsError:
        shutil.rmtree(os.path.join(cd, "ttsdk"))
        os.mkdir(os.path.join(cd, "ttsdk"))
    patoolib.extract_archive(
        os.path.join(cd, "ttsdk.7z"), outdir=os.path.join(cd, "ttsdk")
    )
Beispiel #37
0
 def scan_archives(self):
     for file_path in utils.get_files(self.unsorted_path):
         if utils.is_compressed(file_path, self.extensions):
             patoolib.extract_archive(file_path,
                                      outdir=self.unsorted_path,
                                      verbosity=-1)
             self.process_response(['info', f'Extracting {file_path}'])
             response = utils.delete_file(file_path)
             self.process_response(response)
Beispiel #38
0
    def download_delta(self):
        """ Функция последовательно:
            1) Проверяет наличие новых дельт,
            2) Скачивает архивы дельт
            3) Распаковывает архивы, получая XML для каждой из таблиц
            4) Вызывает функцию загрузки каждого из файлов"""

        # I. Запрашиваем информацию о последних дельтах
        # Подробнее на: https://fias.nalog.ru/WebServices/Public/DownloadService.asmx?op=GetLastDownloadFileInfo

        envelope = """<?xml version="1.0" encoding="utf-8"?>
        <soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/">
          <soap:Body>
            <GetAllDownloadFileInfo xmlns="https://fias.nalog.ru/WebServices/Public/DownloadService.asmx" />
          </soap:Body>
        </soap:Envelope>""".encode('utf-8')

        headers = {'Host': 'fias.nalog.ru', 'Content-Type': 'text/xml; charset=utf-8', 'Content-Length': str(len(envelope))}

        response = requests.post(url='https://fias.nalog.ru/WebServices/Public/DownloadService.asmx', headers=headers, data=envelope)

        tree = ElementTree.fromstring(response.content)

        ns = {'ns': 'https://fias.nalog.ru/WebServices/Public/DownloadService.asmx'}

        # II. Получаем последнюю загруженную версию дельты

        current_version = 574  # TODO: current_version брать из базы

        # III. Итерируемся по каждой записи из списка дельт

        for el in tree.findall('.//ns:DownloadFileInfo', ns):

            version = int(el.find('.//ns:VersionId', ns).text)
            delta = el.find('.//ns:FiasDeltaXmlUrl', ns).text
            file_name = 'fias_delta_xml.rar'

            # Если версия дельты больше загруженной:
            if version > int(current_version):

                current_version = str(version)  # TODO: current_version записывать в базу

                # 1) Скачиваем и сохраняем архив с XML-файлами
                response = requests.get(delta)
                rar = os.path.join(settings.FIAS_DIR, current_version, file_name)

                os.makedirs(os.path.dirname(rar), exist_ok=True)
                with open(rar, 'wb') as f:
                    f.write(response.content)

                # 2) Распаковываем архив
                patoolib.extract_archive(rar, outdir=os.path.join(settings.FIAS_DIR, current_version), interactive=False)

                # 3) Вызываем функцию загрузки для каждого из файлов
                for file in self.models.keys():
                    self.load_to_db(file=file, delta=current_version)
 def unzip(self, base_dir):
     for root, dirs, files in os.walk(base_dir):
         if files:
             for i in files:
                 if i.endswith(self.extensions):
                     #print os.path.join(root,i)
                     Archive(os.path.join(root,i)).extractall(os.path.join(root))
                 elif i.endswith('rar'):
                     print i
                     extract_archive(os.path.join(root,i), outdir=os.path.join(root))
Beispiel #40
0
def unrar_files(root, files):
    for file in files:
        filename, file_extension = os.path.splitext(file)
        if (file_extension == ".rar"):
            print("rar file: ", file)
            file_to_extract = os.path.join(root, file)
            print ("to extract: ", file_to_extract)
            patoolib.extract_archive(file_to_extract, outdir=root, program = rar_program, interactive=False)
        if (".part" in file):
            break
Beispiel #41
0
def extractFiles(indir="C:\\dest",out="C:\\dest\\Extracted"):
    os.chdir(indir)
    archives=glob.glob("*.gz")
    print (archives)
    if not os.path.exists(out):
       os.makedirs(out)
    files=os.listdir("Extracted")
    print(files)
    for archive in archives:
        if archive[:-3] not in files:
            patoolib.extract_archive(archive,outdir=out)
Beispiel #42
0
def unpackRAR(date):

    filename = date + '.rar'
    patoolib.extract_archive(filename, outdir=".")

    # 将原文件移动到新的目录‘newdata’中
    oldfilepath = "./ProcessFile/Stk_Day/Stk_Day_Idx_Daily/%s.csv" % date
    newfilepath = "./newdata/"
    shutil.move(oldfilepath, newfilepath)

    # 删除rar归档及ProcessFile
    shutil.rmtree("./ProcessFile")
    os.remove(filename)
Beispiel #43
0
	def unpackTo(self, outdir):
		# if directory exists and has files we assume that we already successfully extracted the archive
		if os.path.isdir(outdir):
			for dirpath, dirnames, files in os.walk(outdir):
				if len(files): return

		# make sure the outdir exists, but is empty
		createDirectory(outdir, deleteBefore=True)
		try:
			patoolib.extract_archive(self.filename, outdir=outdir)
		except:
			shutil.rmtree(outdir)
			raise
Beispiel #44
0
def unzip():
    db = conn()
    filename = request.args.get('o_id')
    parts_dir = os.path.join(current_app.config['UPLOAD'], "%s" % filename.split('.')[0])
    destination_path = os.path.join(parts_dir, "%s" % (filename))
    patoolib.extract_archive(destination_path,outdir=parts_dir)
    for root, dirs, files in os.walk(os.path.join(parts_dir, "%s" % (filename.split('.')[0]))):
        for name in files:
            if name == 'ENBCFG.XML.gz':
                patoolib.extract_archive(os.path.join(root, name),outdir=root)
                # print(os.path.join(root, 'ENBCFG.XML'))
                xmlImport.xml_import(db, os.path.join(root, 'ENBCFG.XML'), filename.split('.')[0])
    db['ManagerInfo'].update({'_id':filename},{'$set':{'operator':'未分析','status':'已导入'}})
    data = db['ManagerInfo'].find().sort('_id')
    return render_template('upload.html', data=data)
Beispiel #45
0
def convert(min_length, extract = True, delete_tmp = False, zip_after = False):
    none_type_counter = 0
    files_counter = 0

    os.chdir(main_dir)

    if not extract:
        none_type_counter, files_counter = convert_all_html_files(min_length)
    else:          
        files_years = os.listdir(os.curdir)
        for file_rar in files_years:
            if '.rar' in file_rar and os.path.isfile(file_rar):
                if os.path.isdir(tmp_dir):
                    shutil.rmtree(tmp_dir)
                    os.makedirs(tmp_dir)
                else:
                    os.makedirs(tmp_dir)

                patoolib.extract_archive(file_rar, outdir = tmp_dir)

                new_ntc, new_fc = convert_all_html_files(min_length)
                none_type_counter += new_ntc
                files_counter += new_fc
    
    logging.info('Zakonczono konwertowanie')
    logging.info('Liczba przekonwertowanych plikow {0}'.format(files_counter))
    logging.info('Liczba plikow konczacych sie na NoneType, zamiast <META> to: {0}'.format(none_type_counter))

    os.chdir('../')

    if zip_after:
        try:
            with tarfile.open(file_zip_after, "w:gz") as tar:
                tar.add(results_dir, arcname = os.path.basename(results_dir))
            logging.info('Spakowano efekty do {0}'.format(file_zip_after))
        except:
            logging.error('Wystapil blad przy pakowaniu wynikow pracy')
            zip_after = False
    
    if delete_tmp:
        try:
            shutil.rmtree(main_dir)
            if zip_after:
                shutil.rmtree(results_dir)
            logging.info('Skasowane pliki tymczasowe')
        except:
            logging.error('Wystapil blad przy kasowaniu tymczasowych plikow')
Beispiel #46
0
def unpack(path):
    for subdir, dirs, files in os.walk(path):
        print 'subdir[' + subdir + ']'
        print 'dirs[' + ",".join(dirs) + ']'
        print 'files[' + ",".join(files) + ']'
        for file in files:
            f = os.path.join(subdir, file)
            print 'file[' + f + ']'
            if os.path.isfile(f):
                print 'file[' + f + ']'
                if file.endswith('gz'):
                    fo = f[:-3]
                    with gzip.open(f, 'rb') as f_in, open(fo, 'wb') as f_out :
                        shutil.copyfileobj(f_in, f_out)
                    f_in.close();
                    f_out.close();
                    os.remove(f)
                    print 'gunzip ' + fo
                    return False
                elif file.endswith('tar'):
                    tfile = tarfile.open(f, 'r')
                    tfile.extractall(subdir)
                    tfile.close()
                    os.remove(f)
                    print 'untar ' + f
                    return False
                elif file.endswith('zip'):
                    zip_ref = zipfile.ZipFile(f) # create zipfile object
                    zip_ref.extractall(subdir) # extract file to dir
                    zip_ref.close() # close file
                    os.remove(f) # delete zipped file     
                    print 'unzip ' + f
                    return False
                elif file.endswith('rar'):
                    #rar_ref = rarfile.RarFile(f)
                    #rar_ref.extractall(subdir)
                    #rar_ref.close()
                    patoolib.extract_archive(f, outdir=subdir)
                    os.remove(f)
                    print 'unrar ' + f
                    return False
            else:
                unpack(file)
                    
    print 'unpack done!!'
    return True
Beispiel #47
0
    def __extract(self):
        target = self.data['target']
        source_volume = self.get_volume(target)
        archive_file = source_volume.get_info(target)
        archive_file_path = source_volume._find_path(target)
        archive_name = archive_file_path.split('/')[-1].split('.')[0]
        folder_path = os.path.join(
            source_volume._find_path(archive_file.get('phash')),
            archive_name
        )
        self.get_volume(archive_file.get('phash')).mkdir(archive_name, archive_file.get('phash'))
        patoolib.extract_archive(archive_file_path, outdir=folder_path, interactive=False)
        added = []
        for node in source_volume.get_tree(archive_file.get('phash')):
            if source_volume._find_path(node['hash']) == folder_path:
                added.append(node)

        self.response.update({"added": added})
def extractFiles(inDir, outDir):
    os.chdir(inDir)
    
    #Create a list of gz files in directory
    archives = glob.glob("*.gz")
    
    #Check to see if outDir exists, if not create it
    if not os.path.exists(outDir):
        os.makedirs(outDir)
    
    #Assign list of files in directory that have already been extracted
    files = os.listdir(outDir)
        
    #Unpack archiver files
    for archFiles in archives:
        #only if file doesn't already exist, extract it
        if archFiles[:-3] not in files:
            patoolib.extract_archive(archFiles, outdir=outDir)
Beispiel #49
0
def extract_nested(folder):
    """
    Unzip, untar, unrar, or whatever any file found in the student submission.
    """
    import patoolib
    supported_suffixes = ('.zip', '.rar', '.tar.gz', '.tgz', '.tar.bz2',
                '.tar.xz', '.7z', '.tar')
    for root, dirs, files in os.walk(folder):
        for f in files:
            if f.endswith(supported_suffixes):
                try:
                    archive = os.path.join(root, f)
                    vprint('Extracting archive: "%s"' % archive)
                    patoolib.extract_archive(archive, verbosity=-1,
                            interactive=False, outdir=root)
                    os.remove(archive)
                except patoolib.util.PatoolError as e:
                    print(e, file=sys.stderr)
                    print('Failed to extract "%s"' % archive, file=sys.stderr)
Beispiel #50
0
def run(unpack=True):
    # Get data directory from environment
    datadir = os.environ.get('CORINE_DATA_DIRECTORY', '')
    if not datadir:
        print('Datadir not found, please specify CORINE_DATA_DIRECTORY env var.')
        return

    for url in const.SOURCE_URLS_18_4:
        filepath = os.path.join(datadir, os.path.basename(url))
        print('Downloading file', url, filepath)

        response = requests.get(url, stream=True)
        with open(filepath, "wb") as handle:
            for data in response.iter_content(chunk_size=1024):
                if data:
                    handle.write(data)

        if unpack:
            print('Unpacking file', filepath)
            patoolib.extract_archive(filepath, outdir=datadir)
def archive_extractor(input_dir, output_dir):
    os.chdir(input_dir)
    for dirname, dirnames, filenames in os.walk(input_dir):
        for filename in filenames:
            print(filename)
            if filename.endswith('zip') or filename.endswith('rar'):
                try:
                    patoolib.extract_archive(filename, outdir=output_dir)
                    print(filename, 'extracted')
                    first_sub_dir = next(os.walk('.'))[1][0]
                    sub_dir = input_dir + '\\' + first_sub_dir
                    print(sub_dir)
                    new_output = sub_dir + '\output'
                    if not os.path.exists(new_output):
                        os.makedirs(new_output)

                    archive_extractor(sub_dir, new_output)
                except PatoolError as e:
                    print(e)
                    os.chdir('..')
 def __unzip_to_dir(self,zippedfile,dest_directory):
     ret = True
     try:
          #unzip into directory(make it if necessary)
         tmpZip = zipfile.ZipFile(zippedfile)
         tmpZip.extractall(dest_directory)
         self.logger.info("Extracting files to:"+dest_directory)
     except:
         self.logger.debug("Error Unzipping zip:" + zippedfile + "\ntrying rar:", exc_info=True)
         filenameonly,file_ext = os.path.splitext(zippedfile)
         if file_ext == '.rar':
             try:
                 #OK lets try .rar extractor
                 #first create directory
                 if not os.path.exists(dest_directory):
                     os.makedirs(dest_directory)
                     self.logger.info("   Unzip failed trying .rar file to:"+dest_directory)
                 #then extract
                 patoolib.extract_archive(zippedfile,verbosity=1, outdir=dest_directory)
             except:
                 self.logger.debug("Error cannot extract rar file", exc_info=True)
                 ret= False
     return ret
Beispiel #53
0
 def _archive_extract (self, archive, check, verbosity=0):
     # create a temporary directory for extraction
     tmpdir = patoolib.util.tmpdir(dir=basedir)
     try:
         olddir = patoolib.util.chdir(tmpdir)
         try:
             output = patoolib.extract_archive(archive, program=self.program, verbosity=verbosity)
             if check:
                 self.check_extracted_archive(archive, output, check)
         finally:
             if olddir:
                 os.chdir(olddir)
     finally:
         shutil.rmtree(tmpdir)
def ftpDownloader(Id,startID,endID,url="<url here>",user="******",passwd="<password here>"):
    ftp=FTP(url)
    ftp.login(user,passwd)
    if not os.path.exists(pathname):
        os.makedirs(pathname)
    print(ftp.nlst())
    
    ftp.cwd("<ftp working durectory here>")
    os.chdir(pathname)
    
    for array in range(startID, endID+1):
        #Enter full path below, including start and stop IDs
        fullpath='<insert ftp path here>' % (array,Id,array)
        filename=os.path.basename(fullpath)
        try:
            with open(filename,'wb') as file:
                ftp.retrbinary('RETR %s' % fullpath, file.write)
                print("%s downloaded" % filename)
                if filename[-3:] == ".gz" or filename[-4:] == ".zip" or filename[-4:] == ".tar":
                    patoolib.extract_archive(filename,outdir="unpack")
        except error_perm:
            print("%s is not available" % filename)
            os.remove(filename)
    ftp.close()
def archive2dir(archive, remove_dir_structure, out_dir):
    if remove_dir_structure:
        result_dir = os.path.join(out_dir, str(uuid.uuid4()))
        create_dirs(result_dir)

        # make temporary directory
        tempdir = tempfile.mkdtemp()

        # extract archive to temporary directory
        patoolib.extract_archive(archive, outdir=tempdir)

        # copy extracted files to output dir
        files = get_files(tempdir, recursive=True)
        for f in files:
            fo = out_file_name(result_dir, f)
            # don't copy if it's the same file
            if os.path.abspath(f) != fo:
                shutil.copy2(f, fo)

        # remove temporary directory and its contents
        shutil.rmtree(tempdir)
    else:
        # extract archive to temporary directory
        patoolib.extract_archive(archive, outdir=out_dir)
Beispiel #56
0
 def check_created_archive_with_diff(self, archive, srcfiles):
     """Extract created archive again and compare the contents."""
     # diff srcfile and output
     diff = patoolib.util.find_program("diff")
     if not diff:
         return
     program = self.program
     # special case for programs that cannot extract what they create
     if self.program == 'compress':
         program = 'gzip'
     elif self.program == 'zip':
         program = 'unzip'
     elif self.program == 'lcab':
         program = 'cabextract'
     elif self.program == 'shar':
         program = 'unshar'
     elif self.program == 'genisoimage':
         program = '7z'
     tmpdir = patoolib.util.tmpdir(dir=basedir)
     try:
         olddir = patoolib.util.chdir(tmpdir)
         try:
             output = patoolib.extract_archive(archive, program=program)
             if len(srcfiles) == 1:
                 source = os.path.join(datadir, srcfiles[0])
                 patoolib.util.run_checked([diff, "-urN", source, output])
             else:
                 for srcfile in srcfiles:
                     source = os.path.join(datadir, srcfile)
                     target = os.path.join(output, srcfile)
                     patoolib.util.run_checked([diff, "-urN", source, target])
         finally:
             if olddir:
                 os.chdir(olddir)
     finally:
         shutil.rmtree(tmpdir)
Beispiel #57
0
archive_suffix = ['.rar', '.zip', '.7z', '.tar', '.gz', '.tgz', '.tar.gz', 'xz', '.bz2']


def unpack_file(zipfile, outpath='.'):
	tmpdir = os.path.join(outpath, TMP_DIR)
	try:
		if not os.path.exists(outpath):
			os.mkdir(outpath)
		os.mkdir(tmpdir)
	except OSError, e:
		print "mkdir %s failed: %s" %(tmpdir, str(e))
		return -1

	try:
		patoolib.extract_archive(zipfile, outdir=tmpdir)
	except patoolib.PatoolError:
		# TODO: mark zipfile in RED
		print "unpack file %s failed!" %zipfile
		return -1
	else:
		all_files = os.listdir(tmpdir)
		if len(all_files) == 1:
			shutil.move(os.path.join(tmpdir, all_files[0]), outpath)
			os.rmdir(tmpdir)
		elif len(all_files) > 1:
			barename = os.path.basename(zipfile)
			for suffix in archive_suffix:
				barename = barename.split(suffix)[0]
			newdir = os.path.join(outpath, barename)
			os.rename(tmpdir, newdir)
Beispiel #58
0
 def __extractor__extract_rar(self, dst_dir_path):
     patoolib.extract_archive(self.archive_path, outdir=dst_dir_path)
Beispiel #59
0
def extract_archive(filename, directory):
    patoolib.extract_archive(filename, outdir=directory)