def create_user_directories(): count_failed, count_java = 0, 0 unpack_failed_users = [] for filename in os.listdir(ASSIGNMENT_DIRECTORY_PATH): file_path = os.path.join(ASSIGNMENT_DIRECTORY_PATH, filename) if os.path.isdir(file_path) or filename.endswith('.txt'): continue user_id = extract_userid_from_filename(filename) if user_id is None: continue user_dir_path = os.path.join(ASSIGNMENT_DIRECTORY_PATH, user_id) if not os.path.exists(user_dir_path): os.makedirs(user_dir_path) if len(os.listdir(user_dir_path)) > 0: continue # Try extracting files to the corresponding user directory try: pyunpack.Archive(file_path).extractall(user_dir_path) except Exception as e: if filename.endswith(SUBMISSION_FILE_NAME): shutil.copy(file_path, os.path.join(user_dir_path, SUBMISSION_FILE_NAME)) append_to_file(DID_NOT_FOLLOW_GUIDELINES_FILE, user_id) # print("Copied java file for user: %s" % user_id) continue print(e) count_failed += 1 unpack_failed_users.append(user_id) print("Total failed to unpack count: %d" % count_failed) print("Unpacking failed for users: ", unpack_failed_users)
def extract_all(self, path): """ Extract recursively all archives under the path directory and copy all relevant files to the path directory Args: path (str): Path to the directory """ for root, dirs, files in os.walk(path): for f in files: f_path = os.path.join(root, f) if (not os.path.islink(f_path) and self._is_archive(f_path) and os.path.getsize(f_path) != 0): dst_path = os.path.splitext(f_path)[0] print '==== Unpack the file %s ====' % f_path pyunpack.Archive(f_path).extractall(dst_path, auto_create_dir=True) f_path = dst_path if os.path.isdir(f_path): self.extract_all(f_path) if os.path.isfile(f_path) and self._is_relevant_file(f_path): dst_path = os.path.join(self.dst, os.path.basename(f_path)) if self._is_host_log(f_path): dst_path = self._generate_host_log_name(f_path) if f_path != dst_path and not self._is_archive(f_path): shutil.copy(f_path, dst_path) if dirs: for dir_name in dirs: self.extract_all(dir_name)
def unrar(filepath, rm_archive): unzip_folder = os.path.splitext(filepath)[0] try_makedirs(unzip_folder) pyunpack.Archive(filepath).extractall(unzip_folder) if rm_archive: os.remove(filepath) return unzip_folder
def _install_anime4kcpp(self): """ Install Anime4KCPP """ print('\nInstalling Anime4KCPP') import pyunpack import requests # get latest release of Anime4KCPP via Github API # at the time of writing this portion, Anime4KCPP doesn't yet have a stable release # therefore releases/latest won't work latest_release = requests.get( 'https://api.github.com/repos/TianZerL/Anime4KCPP/releases/latest' ).json() for a in latest_release['assets']: if re.search(r'Anime4KCPP_CLI-.*-Win64-msvc\.7z', a['browser_download_url']): anime4kcpp_zip = download(a['browser_download_url'], tempfile.gettempdir()) self.trash.append(anime4kcpp_zip) # extract and rename # with py7zr.SevenZipFile(anime4kcpp_zip, mode='r') as archive: (LOCALAPPDATA / 'video2x' / 'anime4kcpp').mkdir(parents=True, exist_ok=True) pyunpack.Archive(anime4kcpp_zip).extractall(LOCALAPPDATA / 'video2x' / 'anime4kcpp')
def unrar(self, filepath): """must return the filepath of the extracted file""" potentials = glob.glob(os.path.join(filepath, "*r*")) arcpath = potentials[0] for p in potentials: ext = p.split(".")[-1] if ext == "rar": arcpath = p dir_contents = os.listdir(filepath) archive = pyunpack.Archive(arcpath) archive.extractall(filepath) new_contents = os.listdir(filepath) fn = None for c in new_contents: if c not in dir_contents: fn = c renameTo = "%s.%s" % (os.path.basename(filepath), fn.split(".")[-1]) os.rename(os.path.join(filepath, fn), os.path.join(filepath, renameTo)) # archive = UnRAR2.RarFile(arcpath) # fn = archive.infolist()[0].filename # renameTo = "%s.%s" % (os.path.basename(filepath), fn.split(".")[-1]) # archive.extract([0], filepath) # os.rename(os.path.join(filepath, fn), os.path.join(filepath, renameTo)) return os.path.join(filepath, renameTo)
def unzip(zip_path, output_file, data_folder): print("Unzipping file: {}".format(zip_path)) pyunpack.Archive(zip_path).extractall(data_folder) if not os.path.exists(output_file): raise ValueError( "Error in unzipping process! {} not found.".format(output_file))
def extract_submission(arch_path, dest_dir): """raise InvalidSubmission if submission is malformed""" arc.Archive(arch_path, backend="patool").extractall(dest_dir) sources_dir = "" sources_list_file = "" for dirpath, dirnames, filenames in os.walk(dest_dir): if fs.basename(dirpath) == "__MACOSX": continue for dirname in [d for d in dirnames if not _skip_file(d)]: if dirname.lower().find("sources") != -1: sources_dir = fs.join(dirpath, dirname) for filename in [f for f in filenames if not _skip_file(f)]: if filename.lower().find("sources_list") != -1: sources_list_file = fs.join(dirpath, filename) break if not sources_dir: raise InvalidSubmission("Не удалось обнаружить папку sources") if not sources_list_file: raise InvalidSubmission("Не удалось обнаружить файл sources_list.xlsx") return sources_dir, sources_list_file
def download(): DOWNLOAD_URL = 'http://vision.stanford.edu/lijiali/event_dataset/event_dataset.rar' # make sport8 directory sport8 = utils.full_path(os.path.join(dataroot, 'sport8')) meta = utils.full_path(os.path.join(sport8, 'meta')) os.makedirs(sport8, exist_ok=True) os.makedirs(meta, exist_ok=True) dir_downloads = utils.dir_downloads() filename = os.path.basename(DOWNLOAD_URL) archive = os.path.join(dir_downloads, filename) if not os.path.isfile(archive): tvutils.download_url(DOWNLOAD_URL, dir_downloads, filename) print(f"Extracting {archive} to {sport8}") pyunpack.Archive(archive).extractall(sport8) # download the csv files for the train and test split # from 'NAS Evaluation is Frustrating' repo # note that download_url doesn't work in vscode debug mode test_file_url = 'https://raw.githubusercontent.com/antoyang/NAS-Benchmark/master/data/Sport8_test.csv' train_file_url = 'https://raw.githubusercontent.com/antoyang/NAS-Benchmark/master/data/Sport8_train.csv' tvutils.download_url(test_file_url, meta, filename=None, md5=None) tvutils.download_url(train_file_url, meta, filename=None, md5=None) return sport8, meta
def process_archive(archivepath, archivename, rootarchive=''): OK = False print("Processing", archivepath, archivename) try: pyunpack.Archive(os.path.join(archivepath, archivename)).extractall( os.path.join(tmpdir, archivename), auto_create_dir=True) process_folder(os.path.join(tmpdir, archivename), rootarchive=rootarchive + archivename + ' [Z] ') OK = True except pyunpack.PatoolError: print("ERROR: PATOOL ERROR, UNABLE TO UNZIP", rootarchive, archivepath, archivename) OK = False except IOError: print("ERROR: IOERROR, UNABLE TO UNZIP", rootarchive, archivepath, archivename) OK = False except zipfile.BadZipfile: OK = False print("ERROR: zipfile.BadZipfile", rootarchive, archivepath, archivename) except ValueError: OK = False print("ERROR: ValueError, archive does not exist!", rootarchive, archivepath, archivename) try: shutil.rmtree('%s' % (os.path.join(tmpdir, archivename))) except WindowsError: print("WARNING: Unable to delete folder", os.path.join(tmpdir, archivename)) return OK
def extractcars(): arr = os.listdir(modcars) try: firstfile = arr[0] except IndexError: return pathtozip = modcars + firstfile if firstfile.endswith('.7z'): archive = py7zr.SevenZipFile(pathtozip, mode='r') archive.extractall(path=accars) archive.close() if os.path.exists(path=pathtozip): os.remove(path=pathtozip) else: print("The file doesn't exist") elif firstfile.endswith('.zip'): zf = ZipFile(pathtozip, 'r') zf.extractall(accars) zf.close() if os.path.exists(path=pathtozip): os.remove(path=pathtozip) else: print("The file doesn't exist") elif firstfile.endswith('.rar'): pyunpack.Archive(pathtozip).extractall(accars) if os.path.exists(path=pathtozip): os.remove(path=pathtozip) else: print("The file doesn't exist") else: print("Extract Car Function Error")
def _uncompress_general(self, filename, filehash): """ Uncompresses the file and saves to the destination directory """ try: # destination_dir = os.path.join(os.path.dirname(filename), "Uncompressed", os.path.basename(filename.split(".")[0])) destination_dir = os.path.join(os.path.dirname(filename), "Uncompressed", os.path.basename(filename)) if not os.path.isdir(destination_dir): os.makedirs(destination_dir) pyunpack.Archive(filename).extractall(destination_dir) with open(self.log_file, 'a', encoding=self.system_encoding) as log_file: log_file.write("[{} Success]{}".format( self._uncompress_general.__name__, filename)) log_file.write('\n') return destination_dir except Exception as e: with open(self.log_file, 'a', encoding=self.system_encoding) as log_file: log_file.write("[{} Failed]{} --- {}".format( self._uncompress_general.__name__, filename, str(e))) log_file.write('\n') return False
def download_glove(data_dir=data_dir): source = "http://nlp.stanford.edu/data/glove.6B.zip" target = data_dir.joinpath("glove6B.zip") if not target.exists(): downloader(source, target) pyunpack.Archive(target).extractall(data_dir)
def execute(self): self.output( 'Extract %s to %s' % (self.__package.path(), self.__destination), 'Extract %s' % self.__package.path()) import pyunpack pyunpack.Archive(str(self.__package.path(absolute=True))).extractall( str(self.__destination)) return True
def unzip(zip_path, output_file, data_folder): """Unzips files and checks successful completion.""" print('Unzipping file: {}'.format(zip_path)) pyunpack.Archive(zip_path).extractall(data_folder) if not os.path.exists(output_file): raise ValueError( 'Error in unzipping process! {} not found.'.format(output_file))
def rar(): directory = input('directory: ') FileOrDirectory = input('File or whole directory? f/d: ') if FileOrDirectory == 'd' or FileOrDirectory == 'D': for file in os.listdir(os.fsencode(directory)): filename = os.fsdecode(file) if filename.endswith(".rar"): pyunpack.Archive(directory + '\\' + filename).extractall(directory) elif FileOrDirectory == 'f' or FileOrDirectory == 'F': filename = input('filename') try: pyunpack.Archive(directory + '\\' + filename).extractall(directory) except: print('error: file probably not .rar') else: return('error: not proper response') print('decoding complete')
def extractSubFiles(folder_path): for act_file in os.listdir(folder_path): # Get folder name folder_name = act_file.split('.')[0] if '.rar' in act_file: print(f"File: {act_file} Folder: {folder_name}") pyunpack.Archive(os.path.join(folder_path, act_file)).extractall( os.path.join(folder_path)) # Remover .rar os.remove(os.path.join(folder_path, act_file))
def _extract(self): LOGGER.info( f'{self.__class__.__name__}: extracting (this may take a while)') archive = pyunpack.Archive(self.archive) patool = _find_patool() archive.extractall(directory=self.install_dir, auto_create_dir=True, patool_path=str(patool)) LOGGER.debug(f'{self.__class__.__name__}: removing archive') # self.archive.unlink() LOGGER.info(f'{self.__class__.__name__}: successfully extracted')
def extract(self, archive: str) -> str: super().extract(archive) expanded = self.working_dir + '/' + os.path.basename(archive) if not os.path.exists(expanded): os.makedirs(expanded) zipped = pyunpack.Archive(archive) zipped.extractall(expanded, auto_create_dir=True) return expanded
def main(): folder = tempfile.gettempdir() local_filename = urllib.request.urlretrieve( 'http://www.hacker.org/challenge/misc/file.compressed')[0] zip_file = pyunpack.Archive(local_filename) folder += '/rar' zip_file.extractall(folder, auto_create_dir=True) rar_filename = get_regular_filename(folder) rar_file = pyunpack.Archive(os.path.join(folder, rar_filename)) folder += '/arj' rar_file.extractall(folder, auto_create_dir=True) arj_filename = get_regular_filename(folder) arj_file = pyunpack.Archive(os.path.join(folder, arj_filename)) folder += '/cab' arj_file.extractall(folder, auto_create_dir=True) cab_filename = get_regular_filename(folder) cab_file = pyunpack.Archive(os.path.join(folder, cab_filename)) folder += '/hqx' cab_file.extractall(folder, auto_create_dir=True) hqx_filename = get_regular_filename(folder) hqx_path = os.path.join(folder, hqx_filename) folder += '/sitx' os.makedirs(folder, exist_ok=True) os.chdir(folder) binhex.hexbin(hqx_path, None) sitx_filename = get_regular_filename(folder) sitx_path = os.path.join(folder, sitx_filename) folder += '/gz' os.makedirs(folder, exist_ok=True) os.chdir(folder) subprocess.run(['unar', '-f', sitx_path], stdout=open(os.devnull, 'w')) gz_filename = get_regular_filename(folder) gz_file = pyunpack.Archive(os.path.join(folder, gz_filename)) folder += '/bz2' gz_file.extractall(folder, auto_create_dir=True) bz2_filename = get_regular_filename(folder) bz2_file = pyunpack.Archive(os.path.join(folder, bz2_filename)) folder += '/7z' bz2_file.extractall(folder, auto_create_dir=True) _7z_filename = get_regular_filename(folder) _7z_file = pyunpack.Archive(os.path.join(folder, _7z_filename)) folder += '/txt' _7z_file.extractall(folder, auto_create_dir=True) txt_filename = get_regular_filename(folder) print(open(os.path.join(folder, txt_filename)).read())
def download(s: Session, homework: dict, directory: Path): data = '[{"name":"sEcho","value":1},{"name":"iColumns","value":12},{"name":"sColumns","value":",,,,,,,,,,,"},'\ '{"name":"iDisplayStart","value":0},{"name":"iDisplayLength","value":"-1"},{"name":"mDataProp_0",'\ '"value":"function"},{"name":"bSortable_0","value":false},{"name":"mDataProp_1","value":"qzmc"},'\ '{"name":"bSortable_1","value":true},{"name":"mDataProp_2","value":"xh"},{"name":"bSortable_2",'\ '"value":true},{"name":"mDataProp_3","value":"xm"},{"name":"bSortable_3","value":true},'\ '{"name":"mDataProp_4","value":"dwmc"},{"name":"bSortable_4","value":false},'\ '{"name":"mDataProp_5","value":"bm"},{"name":"bSortable_5","value":false},'\ '{"name":"mDataProp_6","value":"xzsj"},{"name":"bSortable_6","value":true},'\ '{"name":"mDataProp_7","value":"scsjStr"},{"name":"bSortable_7","value":false},'\ '{"name":"mDataProp_8","value":"pyzt"},{"name":"bSortable_8","value":true},'\ '{"name":"mDataProp_9","value":"cj"},{"name":"bSortable_9","value":true},'\ '{"name":"mDataProp_10","value":"jsm"},{"name":"bSortable_10","value":true},'\ '{"name":"mDataProp_11","value":"function"},{"name":"bSortable_11","value":false},'\ '{"name":"iSortCol_0","value":2},{"name":"sSortDir_0","value":"asc"},{"name":"iSortingCols","value":1},'\ '{"name":"zyid","value":"%s"},{"name":"wlkcid","value":"%s"}]' % ( homework['zyid'], homework['wlkcid']) url = 'https://learn.tsinghua.edu.cn/b/wlxt/kczy/xszy/teacher/getDoneInfo' students = s.post(url, data={'aoData': data}).json()['object']['aaData'] directory = directory / homework['bt'] directory.mkdir(parents=True, exist_ok=True) for student in tqdm(students): base_url = 'https://learn.tsinghua.edu.cn/b/wlxt/kczy/xszy/teacher/downloadFile' if not student['zyfjid']: continue url = f'{base_url}/{homework["wlkcid"]}/{student["zyfjid"]}' headers = s.head(url).headers raw_filename = re.search('filename="(.*?)"', headers['Content-Disposition']).group(1) suffix = Path(raw_filename).suffix filename = f'{student["xh"]}-{student["xm"]}' + suffix path = directory / filename size = int(headers['Content-Length']) if path.is_file() and path.stat().st_size == size: sleep(0.01) continue response = s.get(url, stream=True) assert response.status_code == 200 with open(path, 'wb') as file: for chunk in tqdm(response.iter_content(32768)): if chunk: file.write(chunk) if path.suffix in ['.rar', '.zip', '.7z']: arch = pyunpack.Archive(path) extract_to = directory / path.stem extract_to.mkdir(exist_ok=True) try: arch.extractall(directory=extract_to) except Exception as e: print(type(e).mro()[0], *e.args) print('Failed to extract', path) else: print('无法识别压缩文件', path)
def unzip(zip_path, output_file, data_folder, use_z=False): """Unzips files and checks successful completion.""" print("Unzipping file: {}".format(zip_path)) if use_z: py7zr.SevenZipFile(zip_path, mode="r").extractall(path=data_folder) else: pyunpack.Archive(zip_path).extractall(data_folder) # Checks if unzip was successful if not os.path.exists(output_file): raise ValueError( "Error in unzipping process! {} not found.".format(output_file))
def get_page_info(file, page_size, page): """ Creates a generator to get images from an archive. @param file The archive file @returns total_pages, begin_page, end_page """ if page is None: page = 1 if page_size is None: page_size = 9999 begin_page = (page - 1) * page_size end_page = page * page_size total_pages = 0 if zipfile.is_zipfile(file): zip_file = zipfile.ZipFile(file, 'r') file_list = zip_file.infolist() if end_page > len(file_list): end_page = len(file_list) for entry in file_list: if not entry.is_dir(): total_pages = total_pages + 1 zip_file.close() elif rarfile.is_rarfile(file): rar_file = rarfile.RarFile(file, 'r') file_list = rar_file.infolist() if end_page > len(file_list): end_page = len(file_list) for entry in file_list: if not entry.isdir(): total_pages = total_pages + 1 rar_file.close() else: try: tempdir = tempfile.mkdtemp() archive_file = pyunpack.Archive(file) archive_file.extractall(tempdir) fileList = [] for root, dirs, files in walk(tempdir): for file in files: fileList.append(root + "/" + file) fileList.sort() if end_page > len(file_list): end_page = len(file_list) for file in fileList: total_pages = total_pages + 1 except Exception as e: print(e) finally: #delete temp files shutil.rmtree(tempdir) return begin_page, end_page, total_pages
def unpack(_file, path): paths = [] ext = _file.split(".") e = ext[-1] if ext[-1] != "gz" and ext[-2] != "tar" else "tar.gz" try: pyunpack.Archive(_file).extractall(path) for _dir, _dirs, _files in os.walk(path): if len(_dirs) == 0 and len(_files) > 0: for i in _files: paths.append(os.path.join(_dir, i)) os.unlink(_file) file_name = _file.split("/")[-1] return (False, paths, e, file_name) except: return (True, None, None, file_name)
def unpack_game(game, removezip=False): targetDir = config.GAMES / game.source / convertToFileName( game.name) + '.' + str(game.id) print 'unpacking %s...' % game.name, sys.stdout.flush() pyunpack.Archive(game.zip).extractall(targetDir, auto_create_dir=True) # if msg: # return False print 'OK' if removezip: os.remove(game.zip) game.dir = targetDir db.save_game(game) return True
def create(input_rgb=None, input_depth=None): if (input_rgb is None) != (input_depth is None): raise ValueError( "Either both or neither of input_rgb and input_depth should be given" ) return_model = input_rgb is None if input_rgb is None: input_rgb = tf.keras.layers.Input((None, None, 3)) input_depth = tf.keras.layers.Input((None, None, 1)) # Create model x = esanet.esanet(input_rgb, input_depth, classes=40, num_residual_units=[3, 4, 6, 3], filters=[64, 128, 256, 512], dilation_rates=[1, 1, 1, 1], strides=[1, 2, 2, 2], psp_bin_sizes=[1, 5], config=config) x = tf.keras.layers.Softmax()(x) model = tf.keras.Model(inputs=[input_rgb, input_depth], outputs=[x]) # Fix batchnorm epsilons for layer in model.layers: if layer.name.endswith("/1/norm") or layer.name.endswith("/2/norm"): layer.epsilon = 1e-3 model = tf.keras.Model(inputs=model.inputs, outputs=[model.output]) # https://github.com/TUI-NICR/ESANet download_file = os.path.join(os.path.expanduser("~"), ".keras", "nyuv2_r34_NBt1D_scenenet.tar.gz") gdd.download_file_from_google_drive( file_id="1w_Qa8AWUC6uHzQamwu-PAqA7P00hgl8w", dest_path=download_file) weights_uncompressed = os.path.join(os.path.dirname(download_file), "nyuv2", "r34_NBt1D_scenenet.pth") if not os.path.isfile(weights_uncompressed): pyunpack.Archive(download_file).extractall( os.path.dirname(download_file)) tfcv.model.pretrained.weights.load_pth( weights_uncompressed, model, convert_name, ignore=lambda name: "side_output" in name) return model if return_model else x
def load_wiki_attacks(data_dir=data_dir): if not data_dir.exists(): data_dir.mkdir(parents=True) if not data_dir.joinpath("text_data").exists(): source = "https://activelearning.blob.core.windows.net/activelearningdemo/text_data.zip" target = str(data_dir / "text_data.zip") downloader(source, target) pyunpack.Archive(target).extractall(data_dir) toxic_df = pd.read_csv(str(data_dir / "text_data" / "attack_data.csv"), encoding="ISO-8859-1") toxic_df["comment_text"] = toxic_df.comment.replace( r'NEWLINE_TOKEN|[^.,A-Za-z0-9]+', ' ', regex=True) return toxic_df
def fetch(self) -> bytes: try: response = requests.get(self.url) except requests.RequestException as e: colors.print_error("[!]" + e) return False # Get response data = response.content # Check if data is compressed if is_compressed(data): colors.print_info("[-] Decompressing %s" % self.url) # Write to temporary file the response if not os.path.exists(temp_dir): os.mkdir(temp_dir) temp_filename = temp_dir + "tempfile" # Sadly we need to write it to a file because pyunpack can't yet # decompress from binary data directly from memory temp_file = open(temp_filename, "wb") temp_file.write(data) temp_file.close() # Decompress filename = temp_filename archive_dir = temp_dir + "archive/" if not os.path.exists(archive_dir): os.mkdir(archive_dir) # Sometimes it's compressed multiple times while (True): arch = pyunpack.Archive(filename) arch.extractall(archive_dir) os.remove(filename) filename = archive_dir + os.listdir(archive_dir)[0] compressed = is_file_compressed(filename) if not compressed: break temp_file = open(filename, "rb") data = bytes(temp_file.read()) temp_file.close() os.remove(filename) return data
def _extract_archive(self, archive_path, unzip_dir='.', archive_format='auto'): if archive_format is None: return False if archive_format == 'auto': archive_format = ['tar', 'zip', 'rar'] if isinstance(archive_format, six.string_types): archive_format = [archive_format] is_match_fn = None open_fn = None for archive_type in archive_format: if archive_type == 'tar': open_fn = tarfile.open is_match_fn = tarfile.is_tarfile if archive_type == 'zip': open_fn = zipfile.ZipFile is_match_fn = zipfile.is_zipfile if archive_type == 'rar': archive = pyunpack.Archive(archive_path) archive.extractall('\\'.join(unzip_dir.split('\\')[:-1])) return True if is_match_fn(archive_path): with open_fn(archive_path) as archive: try: archive.extractall(unzip_dir) except (tarfile.TarError, RuntimeError, KeyboardInterrupt): if os.path.exists(unzip_dir): if os.path.isfile(unzip_dir): os.remove(unzip_dir) else: shutil.rmtree(unzip_dir) raise return True return False
def create_x(input, dilate, resnet_v1_x, url): return_model = input is None if input is None: input = tf.keras.layers.Input((None, None, 3)) x = input x = resnet_v1_x(x, dilate=dilate, stem="b", config=config) model = tf.keras.Model(inputs=[input], outputs=[x]) weights_compressed = tf.keras.utils.get_file(url.split("/")[-1], url) weights_uncompressed = weights_compressed[:-len("_2016_08_28.tar.gz" )] + ".ckpt" if not os.path.isfile(weights_uncompressed): pyunpack.Archive(weights_compressed).extractall( os.path.dirname(weights_compressed)) tfcv.model.pretrained.weights.load_ckpt(weights_uncompressed, model, convert_name) return model if return_model else x
def extract_kaggle_archive_to_local_path(local_archive_path, local_fname): # Annoyingly we have to special case Kaggle files that have the string 'v2' in them. is_v2_archive = 'v2' in local_archive_path print("Extracting {} to {}...".format(local_archive_path, local_fname)) archive = pyunpack.Archive(local_archive_path) archive.extractall(LOCAL_DATA_PATH) if is_v2_archive: # This is the directory to which the archive was extracted extract_dir = os.path.join(LOCAL_DATA_PATH, KAGGLE_V2_ARCHIVE_BASE_PATH) extracted_location = os.path.join(extract_dir, local_fname) desired_location = os.path.join(LOCAL_DATA_PATH, local_fname) os.rename(extracted_location, desired_location) print("Removing needless extracted directories...") os.rmdir(extract_dir) os.rmdir(os.path.join(LOCAL_DATA_PATH, 'data')) print("Done removing those needless directories!") print("Done with extraction, removing {}...".format(local_archive_path)) os.remove(local_archive_path) print("All done extracting!")