def create_user_directories():
    count_failed, count_java = 0, 0
    unpack_failed_users = []
    for filename in os.listdir(ASSIGNMENT_DIRECTORY_PATH):
        file_path = os.path.join(ASSIGNMENT_DIRECTORY_PATH, filename)
        if os.path.isdir(file_path) or filename.endswith('.txt'):
            continue
        user_id = extract_userid_from_filename(filename)
        if user_id is None:
            continue
        user_dir_path = os.path.join(ASSIGNMENT_DIRECTORY_PATH, user_id)
        if not os.path.exists(user_dir_path):
            os.makedirs(user_dir_path)
        if len(os.listdir(user_dir_path)) > 0:
            continue
        # Try extracting files to the corresponding user directory
        try:
            pyunpack.Archive(file_path).extractall(user_dir_path)
        except Exception as e:
            if filename.endswith(SUBMISSION_FILE_NAME):
                shutil.copy(file_path,
                            os.path.join(user_dir_path, SUBMISSION_FILE_NAME))
                append_to_file(DID_NOT_FOLLOW_GUIDELINES_FILE, user_id)
                # print("Copied java file for user: %s" % user_id)
                continue
            print(e)
            count_failed += 1
            unpack_failed_users.append(user_id)
    print("Total failed to unpack count: %d" % count_failed)
    print("Unpacking failed for users: ", unpack_failed_users)
Exemple #2
0
    def extract_all(self, path):
        """
        Extract recursively all archives under the path directory and
        copy all relevant files to the path directory

        Args:
            path (str): Path to the directory
        """
        for root, dirs, files in os.walk(path):
            for f in files:
                f_path = os.path.join(root, f)
                if (not os.path.islink(f_path) and self._is_archive(f_path)
                        and os.path.getsize(f_path) != 0):
                    dst_path = os.path.splitext(f_path)[0]
                    print '==== Unpack the file %s ====' % f_path
                    pyunpack.Archive(f_path).extractall(dst_path,
                                                        auto_create_dir=True)
                    f_path = dst_path
                    if os.path.isdir(f_path):
                        self.extract_all(f_path)
                if os.path.isfile(f_path) and self._is_relevant_file(f_path):
                    dst_path = os.path.join(self.dst, os.path.basename(f_path))
                    if self._is_host_log(f_path):
                        dst_path = self._generate_host_log_name(f_path)
                    if f_path != dst_path and not self._is_archive(f_path):
                        shutil.copy(f_path, dst_path)
            if dirs:
                for dir_name in dirs:
                    self.extract_all(dir_name)
Exemple #3
0
def unrar(filepath, rm_archive):
    unzip_folder = os.path.splitext(filepath)[0]
    try_makedirs(unzip_folder)
    pyunpack.Archive(filepath).extractall(unzip_folder)
    if rm_archive:
        os.remove(filepath)
    return unzip_folder
Exemple #4
0
    def _install_anime4kcpp(self):
        """ Install Anime4KCPP
        """
        print('\nInstalling Anime4KCPP')

        import pyunpack
        import requests

        # get latest release of Anime4KCPP via Github API
        # at the time of writing this portion, Anime4KCPP doesn't yet have a stable release
        # therefore releases/latest won't work
        latest_release = requests.get(
            'https://api.github.com/repos/TianZerL/Anime4KCPP/releases/latest'
        ).json()

        for a in latest_release['assets']:
            if re.search(r'Anime4KCPP_CLI-.*-Win64-msvc\.7z',
                         a['browser_download_url']):
                anime4kcpp_zip = download(a['browser_download_url'],
                                          tempfile.gettempdir())
                self.trash.append(anime4kcpp_zip)

        # extract and rename
        # with py7zr.SevenZipFile(anime4kcpp_zip, mode='r') as archive:
        (LOCALAPPDATA / 'video2x' / 'anime4kcpp').mkdir(parents=True,
                                                        exist_ok=True)
        pyunpack.Archive(anime4kcpp_zip).extractall(LOCALAPPDATA / 'video2x' /
                                                    'anime4kcpp')
Exemple #5
0
    def unrar(self, filepath):
        """must return the filepath of the extracted file"""
        potentials = glob.glob(os.path.join(filepath, "*r*"))
        arcpath = potentials[0]
        for p in potentials:
            ext = p.split(".")[-1]
            if ext == "rar":
                arcpath = p

        dir_contents = os.listdir(filepath)

        archive = pyunpack.Archive(arcpath)
        archive.extractall(filepath)

        new_contents = os.listdir(filepath)
        fn = None
        for c in new_contents:
            if c not in dir_contents:
                fn = c

        renameTo = "%s.%s" % (os.path.basename(filepath), fn.split(".")[-1])
        os.rename(os.path.join(filepath, fn), os.path.join(filepath, renameTo))

        # archive = UnRAR2.RarFile(arcpath)
        # fn = archive.infolist()[0].filename
        # renameTo = "%s.%s" % (os.path.basename(filepath), fn.split(".")[-1])
        # archive.extract([0], filepath)
        # os.rename(os.path.join(filepath, fn), os.path.join(filepath, renameTo))
        return os.path.join(filepath, renameTo)
Exemple #6
0
def unzip(zip_path, output_file, data_folder):
    print("Unzipping file: {}".format(zip_path))
    pyunpack.Archive(zip_path).extractall(data_folder)

    if not os.path.exists(output_file):
        raise ValueError(
            "Error in unzipping process! {} not found.".format(output_file))
def extract_submission(arch_path, dest_dir):
    """raise InvalidSubmission if submission is malformed"""
    arc.Archive(arch_path, backend="patool").extractall(dest_dir)
    sources_dir = ""
    sources_list_file = ""

    for dirpath, dirnames, filenames in os.walk(dest_dir):
        if fs.basename(dirpath) == "__MACOSX":
            continue

        for dirname in [d for d in dirnames if not _skip_file(d)]:
            if dirname.lower().find("sources") != -1:
                sources_dir = fs.join(dirpath, dirname)

        for filename in [f for f in filenames if not _skip_file(f)]:
            if filename.lower().find("sources_list") != -1:
                sources_list_file = fs.join(dirpath, filename)
                break

    if not sources_dir:
        raise InvalidSubmission("Не удалось обнаружить папку sources")

    if not sources_list_file:
        raise InvalidSubmission("Не удалось обнаружить файл sources_list.xlsx")

    return sources_dir, sources_list_file
Exemple #8
0
def download():
    DOWNLOAD_URL = 'http://vision.stanford.edu/lijiali/event_dataset/event_dataset.rar'

    # make sport8 directory
    sport8 = utils.full_path(os.path.join(dataroot, 'sport8'))
    meta = utils.full_path(os.path.join(sport8, 'meta'))

    os.makedirs(sport8, exist_ok=True)
    os.makedirs(meta, exist_ok=True)

    dir_downloads = utils.dir_downloads()
    filename = os.path.basename(DOWNLOAD_URL)
    archive = os.path.join(dir_downloads, filename)
    if not os.path.isfile(archive):
        tvutils.download_url(DOWNLOAD_URL, dir_downloads, filename)
    print(f"Extracting {archive} to {sport8}")
    pyunpack.Archive(archive).extractall(sport8)

    # download the csv files for the train and test split
    # from 'NAS Evaluation is Frustrating' repo
    # note that download_url doesn't work in vscode debug mode
    test_file_url = 'https://raw.githubusercontent.com/antoyang/NAS-Benchmark/master/data/Sport8_test.csv'
    train_file_url = 'https://raw.githubusercontent.com/antoyang/NAS-Benchmark/master/data/Sport8_train.csv'

    tvutils.download_url(test_file_url, meta, filename=None, md5=None)
    tvutils.download_url(train_file_url, meta, filename=None, md5=None)

    return sport8, meta
def process_archive(archivepath, archivename, rootarchive=''):
    OK = False
    print("Processing", archivepath, archivename)
    try:
        pyunpack.Archive(os.path.join(archivepath, archivename)).extractall(
            os.path.join(tmpdir, archivename), auto_create_dir=True)
        process_folder(os.path.join(tmpdir, archivename),
                       rootarchive=rootarchive + archivename + ' [Z] ')
        OK = True
    except pyunpack.PatoolError:
        print("ERROR: PATOOL ERROR, UNABLE TO UNZIP", rootarchive, archivepath,
              archivename)
        OK = False
    except IOError:
        print("ERROR: IOERROR, UNABLE TO UNZIP", rootarchive, archivepath,
              archivename)
        OK = False
    except zipfile.BadZipfile:
        OK = False
        print("ERROR: zipfile.BadZipfile", rootarchive, archivepath,
              archivename)
    except ValueError:
        OK = False
        print("ERROR: ValueError, archive does not exist!", rootarchive,
              archivepath, archivename)
    try:
        shutil.rmtree('%s' % (os.path.join(tmpdir, archivename)))
    except WindowsError:
        print("WARNING: Unable to delete folder",
              os.path.join(tmpdir, archivename))
    return OK
Exemple #10
0
def extractcars():
    arr = os.listdir(modcars)
    try:
        firstfile = arr[0]
    except IndexError:
        return

    pathtozip = modcars + firstfile

    if firstfile.endswith('.7z'):
        archive = py7zr.SevenZipFile(pathtozip, mode='r')
        archive.extractall(path=accars)
        archive.close()
        if os.path.exists(path=pathtozip):
            os.remove(path=pathtozip)
        else:
            print("The file doesn't exist")
    elif firstfile.endswith('.zip'):
        zf = ZipFile(pathtozip, 'r')
        zf.extractall(accars)
        zf.close()
        if os.path.exists(path=pathtozip):
            os.remove(path=pathtozip)
        else:
            print("The file doesn't exist")
    elif firstfile.endswith('.rar'):
        pyunpack.Archive(pathtozip).extractall(accars)
        if os.path.exists(path=pathtozip):
            os.remove(path=pathtozip)
        else:
            print("The file doesn't exist")
    else:
        print("Extract Car Function Error")
Exemple #11
0
    def _uncompress_general(self, filename, filehash):
        """
        Uncompresses the file and saves to the destination directory
        """
        try:
            # destination_dir = os.path.join(os.path.dirname(filename), "Uncompressed", os.path.basename(filename.split(".")[0]))
            destination_dir = os.path.join(os.path.dirname(filename),
                                           "Uncompressed",
                                           os.path.basename(filename))

            if not os.path.isdir(destination_dir):
                os.makedirs(destination_dir)

            pyunpack.Archive(filename).extractall(destination_dir)

            with open(self.log_file, 'a',
                      encoding=self.system_encoding) as log_file:
                log_file.write("[{} Success]{}".format(
                    self._uncompress_general.__name__, filename))
                log_file.write('\n')
            return destination_dir

        except Exception as e:
            with open(self.log_file, 'a',
                      encoding=self.system_encoding) as log_file:
                log_file.write("[{} Failed]{} --- {}".format(
                    self._uncompress_general.__name__, filename, str(e)))
                log_file.write('\n')
            return False
Exemple #12
0
def download_glove(data_dir=data_dir):

    source = "http://nlp.stanford.edu/data/glove.6B.zip"
    target = data_dir.joinpath("glove6B.zip")

    if not target.exists():
        downloader(source, target)
        pyunpack.Archive(target).extractall(data_dir)
Exemple #13
0
 def execute(self):
     self.output(
         'Extract %s to %s' % (self.__package.path(), self.__destination),
         'Extract %s' % self.__package.path())
     import pyunpack
     pyunpack.Archive(str(self.__package.path(absolute=True))).extractall(
         str(self.__destination))
     return True
Exemple #14
0
def unzip(zip_path, output_file, data_folder):
    """Unzips files and checks successful completion."""

    print('Unzipping file: {}'.format(zip_path))
    pyunpack.Archive(zip_path).extractall(data_folder)

    if not os.path.exists(output_file):
        raise ValueError(
            'Error in unzipping process! {} not found.'.format(output_file))
    def rar():
        directory = input('directory: ')
    
        FileOrDirectory = input('File or whole directory? f/d: ') 
        if FileOrDirectory == 'd' or FileOrDirectory == 'D':
            for file in os.listdir(os.fsencode(directory)):
                filename = os.fsdecode(file)
                if filename.endswith(".rar"):
                    pyunpack.Archive(directory + '\\' + filename).extractall(directory)

        elif FileOrDirectory == 'f' or FileOrDirectory == 'F':
            filename = input('filename')
            try:
                pyunpack.Archive(directory + '\\' + filename).extractall(directory)
            except:
                print('error: file probably not .rar')
        else:
            return('error: not proper response')
        print('decoding complete')
def extractSubFiles(folder_path):
    for act_file in os.listdir(folder_path):
        # Get folder name
        folder_name = act_file.split('.')[0]
        if '.rar' in act_file:
            print(f"File: {act_file} Folder: {folder_name}")
            pyunpack.Archive(os.path.join(folder_path, act_file)).extractall(
                os.path.join(folder_path))
            # Remover .rar
            os.remove(os.path.join(folder_path, act_file))
Exemple #17
0
 def _extract(self):
     LOGGER.info(
         f'{self.__class__.__name__}: extracting (this may take a while)')
     archive = pyunpack.Archive(self.archive)
     patool = _find_patool()
     archive.extractall(directory=self.install_dir,
                        auto_create_dir=True,
                        patool_path=str(patool))
     LOGGER.debug(f'{self.__class__.__name__}: removing archive')
     # self.archive.unlink()
     LOGGER.info(f'{self.__class__.__name__}: successfully extracted')
Exemple #18
0
    def extract(self, archive: str) -> str:
        super().extract(archive)

        expanded = self.working_dir + '/' + os.path.basename(archive)
        if not os.path.exists(expanded):
            os.makedirs(expanded)

        zipped = pyunpack.Archive(archive)
        zipped.extractall(expanded, auto_create_dir=True)

        return expanded
Exemple #19
0
def main():
    folder = tempfile.gettempdir()

    local_filename = urllib.request.urlretrieve(
        'http://www.hacker.org/challenge/misc/file.compressed')[0]

    zip_file = pyunpack.Archive(local_filename)
    folder += '/rar'
    zip_file.extractall(folder, auto_create_dir=True)
    rar_filename = get_regular_filename(folder)

    rar_file = pyunpack.Archive(os.path.join(folder, rar_filename))
    folder += '/arj'
    rar_file.extractall(folder, auto_create_dir=True)
    arj_filename = get_regular_filename(folder)

    arj_file = pyunpack.Archive(os.path.join(folder, arj_filename))
    folder += '/cab'
    arj_file.extractall(folder, auto_create_dir=True)
    cab_filename = get_regular_filename(folder)

    cab_file = pyunpack.Archive(os.path.join(folder, cab_filename))
    folder += '/hqx'
    cab_file.extractall(folder, auto_create_dir=True)
    hqx_filename = get_regular_filename(folder)

    hqx_path = os.path.join(folder, hqx_filename)
    folder += '/sitx'
    os.makedirs(folder, exist_ok=True)
    os.chdir(folder)
    binhex.hexbin(hqx_path, None)
    sitx_filename = get_regular_filename(folder)

    sitx_path = os.path.join(folder, sitx_filename)
    folder += '/gz'
    os.makedirs(folder, exist_ok=True)
    os.chdir(folder)
    subprocess.run(['unar', '-f', sitx_path], stdout=open(os.devnull, 'w'))
    gz_filename = get_regular_filename(folder)

    gz_file = pyunpack.Archive(os.path.join(folder, gz_filename))
    folder += '/bz2'
    gz_file.extractall(folder, auto_create_dir=True)
    bz2_filename = get_regular_filename(folder)

    bz2_file = pyunpack.Archive(os.path.join(folder, bz2_filename))
    folder += '/7z'
    bz2_file.extractall(folder, auto_create_dir=True)
    _7z_filename = get_regular_filename(folder)

    _7z_file = pyunpack.Archive(os.path.join(folder, _7z_filename))
    folder += '/txt'
    _7z_file.extractall(folder, auto_create_dir=True)
    txt_filename = get_regular_filename(folder)

    print(open(os.path.join(folder, txt_filename)).read())
Exemple #20
0
def download(s: Session, homework: dict, directory: Path):
    data = '[{"name":"sEcho","value":1},{"name":"iColumns","value":12},{"name":"sColumns","value":",,,,,,,,,,,"},'\
           '{"name":"iDisplayStart","value":0},{"name":"iDisplayLength","value":"-1"},{"name":"mDataProp_0",'\
           '"value":"function"},{"name":"bSortable_0","value":false},{"name":"mDataProp_1","value":"qzmc"},'\
           '{"name":"bSortable_1","value":true},{"name":"mDataProp_2","value":"xh"},{"name":"bSortable_2",'\
           '"value":true},{"name":"mDataProp_3","value":"xm"},{"name":"bSortable_3","value":true},'\
           '{"name":"mDataProp_4","value":"dwmc"},{"name":"bSortable_4","value":false},'\
           '{"name":"mDataProp_5","value":"bm"},{"name":"bSortable_5","value":false},'\
           '{"name":"mDataProp_6","value":"xzsj"},{"name":"bSortable_6","value":true},'\
           '{"name":"mDataProp_7","value":"scsjStr"},{"name":"bSortable_7","value":false},'\
           '{"name":"mDataProp_8","value":"pyzt"},{"name":"bSortable_8","value":true},'\
           '{"name":"mDataProp_9","value":"cj"},{"name":"bSortable_9","value":true},'\
           '{"name":"mDataProp_10","value":"jsm"},{"name":"bSortable_10","value":true},'\
           '{"name":"mDataProp_11","value":"function"},{"name":"bSortable_11","value":false},'\
           '{"name":"iSortCol_0","value":2},{"name":"sSortDir_0","value":"asc"},{"name":"iSortingCols","value":1},'\
           '{"name":"zyid","value":"%s"},{"name":"wlkcid","value":"%s"}]' % (
               homework['zyid'], homework['wlkcid'])
    url = 'https://learn.tsinghua.edu.cn/b/wlxt/kczy/xszy/teacher/getDoneInfo'
    students = s.post(url, data={'aoData': data}).json()['object']['aaData']
    directory = directory / homework['bt']
    directory.mkdir(parents=True, exist_ok=True)
    for student in tqdm(students):
        base_url = 'https://learn.tsinghua.edu.cn/b/wlxt/kczy/xszy/teacher/downloadFile'
        if not student['zyfjid']:
            continue
        url = f'{base_url}/{homework["wlkcid"]}/{student["zyfjid"]}'
        headers = s.head(url).headers
        raw_filename = re.search('filename="(.*?)"',
                                 headers['Content-Disposition']).group(1)
        suffix = Path(raw_filename).suffix
        filename = f'{student["xh"]}-{student["xm"]}' + suffix
        path = directory / filename
        size = int(headers['Content-Length'])
        if path.is_file() and path.stat().st_size == size:
            sleep(0.01)
            continue
        response = s.get(url, stream=True)
        assert response.status_code == 200
        with open(path, 'wb') as file:
            for chunk in tqdm(response.iter_content(32768)):
                if chunk:
                    file.write(chunk)
        if path.suffix in ['.rar', '.zip', '.7z']:
            arch = pyunpack.Archive(path)
            extract_to = directory / path.stem
            extract_to.mkdir(exist_ok=True)
            try:
                arch.extractall(directory=extract_to)
            except Exception as e:
                print(type(e).mro()[0], *e.args)
                print('Failed to extract', path)
        else:
            print('无法识别压缩文件', path)
Exemple #21
0
def unzip(zip_path, output_file, data_folder, use_z=False):
    """Unzips files and checks successful completion."""

    print("Unzipping file: {}".format(zip_path))
    if use_z:
        py7zr.SevenZipFile(zip_path, mode="r").extractall(path=data_folder)
    else:
        pyunpack.Archive(zip_path).extractall(data_folder)

    # Checks if unzip was successful
    if not os.path.exists(output_file):
        raise ValueError(
            "Error in unzipping process! {} not found.".format(output_file))
Exemple #22
0
def get_page_info(file, page_size, page):
    """
    Creates a generator to get images from an archive.
    @param file The archive file
    @returns total_pages, begin_page, end_page
    """
    if page is None:
        page = 1
    if page_size is None:
        page_size = 9999
    begin_page = (page - 1) * page_size
    end_page = page * page_size
    total_pages = 0
    if zipfile.is_zipfile(file):
        zip_file = zipfile.ZipFile(file, 'r')
        file_list = zip_file.infolist()
        if end_page > len(file_list):
            end_page = len(file_list)
        for entry in file_list:
            if not entry.is_dir():
                total_pages = total_pages + 1
        zip_file.close()
    elif rarfile.is_rarfile(file):
        rar_file = rarfile.RarFile(file, 'r')
        file_list = rar_file.infolist()
        if end_page > len(file_list):
            end_page = len(file_list)
        for entry in file_list:
            if not entry.isdir():
                total_pages = total_pages + 1
        rar_file.close()
    else:
        try:
            tempdir = tempfile.mkdtemp()
            archive_file = pyunpack.Archive(file)
            archive_file.extractall(tempdir)
            fileList = []
            for root, dirs, files in walk(tempdir):
                for file in files:
                    fileList.append(root + "/" + file)
            fileList.sort()
            if end_page > len(file_list):
                end_page = len(file_list)
            for file in fileList:
                total_pages = total_pages + 1
        except Exception as e:
            print(e)
        finally:
            #delete temp files
            shutil.rmtree(tempdir)
    return begin_page, end_page, total_pages
def unpack(_file, path):
    paths = []
    ext = _file.split(".")
    e = ext[-1] if ext[-1] != "gz" and ext[-2] != "tar" else "tar.gz"
    try:
        pyunpack.Archive(_file).extractall(path)
        for _dir, _dirs, _files in os.walk(path):
            if len(_dirs) == 0 and len(_files) > 0:
                for i in _files:
                    paths.append(os.path.join(_dir, i))
        os.unlink(_file)
        file_name = _file.split("/")[-1]
        return (False, paths, e, file_name)
    except:
        return (True, None, None, file_name)
Exemple #24
0
def unpack_game(game, removezip=False):
    targetDir = config.GAMES / game.source / convertToFileName(
        game.name) + '.' + str(game.id)

    print 'unpacking %s...' % game.name,
    sys.stdout.flush()

    pyunpack.Archive(game.zip).extractall(targetDir, auto_create_dir=True)
    #    if msg:
    #        return False
    print 'OK'
    if removezip:
        os.remove(game.zip)
    game.dir = targetDir
    db.save_game(game)
    return True
Exemple #25
0
def create(input_rgb=None, input_depth=None):
    if (input_rgb is None) != (input_depth is None):
        raise ValueError(
            "Either both or neither of input_rgb and input_depth should be given"
        )
    return_model = input_rgb is None
    if input_rgb is None:
        input_rgb = tf.keras.layers.Input((None, None, 3))
        input_depth = tf.keras.layers.Input((None, None, 1))

    # Create model
    x = esanet.esanet(input_rgb,
                      input_depth,
                      classes=40,
                      num_residual_units=[3, 4, 6, 3],
                      filters=[64, 128, 256, 512],
                      dilation_rates=[1, 1, 1, 1],
                      strides=[1, 2, 2, 2],
                      psp_bin_sizes=[1, 5],
                      config=config)
    x = tf.keras.layers.Softmax()(x)
    model = tf.keras.Model(inputs=[input_rgb, input_depth], outputs=[x])

    # Fix batchnorm epsilons
    for layer in model.layers:
        if layer.name.endswith("/1/norm") or layer.name.endswith("/2/norm"):
            layer.epsilon = 1e-3
    model = tf.keras.Model(inputs=model.inputs, outputs=[model.output])

    # https://github.com/TUI-NICR/ESANet
    download_file = os.path.join(os.path.expanduser("~"), ".keras",
                                 "nyuv2_r34_NBt1D_scenenet.tar.gz")
    gdd.download_file_from_google_drive(
        file_id="1w_Qa8AWUC6uHzQamwu-PAqA7P00hgl8w", dest_path=download_file)
    weights_uncompressed = os.path.join(os.path.dirname(download_file),
                                        "nyuv2", "r34_NBt1D_scenenet.pth")
    if not os.path.isfile(weights_uncompressed):
        pyunpack.Archive(download_file).extractall(
            os.path.dirname(download_file))

    tfcv.model.pretrained.weights.load_pth(
        weights_uncompressed,
        model,
        convert_name,
        ignore=lambda name: "side_output" in name)

    return model if return_model else x
Exemple #26
0
def load_wiki_attacks(data_dir=data_dir):

    if not data_dir.exists():
        data_dir.mkdir(parents=True)
    if not data_dir.joinpath("text_data").exists():
        source = "https://activelearning.blob.core.windows.net/activelearningdemo/text_data.zip"
        target = str(data_dir / "text_data.zip")

        downloader(source, target)
        pyunpack.Archive(target).extractall(data_dir)

    toxic_df = pd.read_csv(str(data_dir / "text_data" / "attack_data.csv"),
                           encoding="ISO-8859-1")
    toxic_df["comment_text"] = toxic_df.comment.replace(
        r'NEWLINE_TOKEN|[^.,A-Za-z0-9]+', ' ', regex=True)

    return toxic_df
Exemple #27
0
    def fetch(self) -> bytes:
        try:
            response = requests.get(self.url)
        except requests.RequestException as e:
            colors.print_error("[!]" + e)
            return False

        # Get response
        data = response.content

        # Check if data is compressed
        if is_compressed(data):
            colors.print_info("[-] Decompressing %s" % self.url)
            # Write to temporary file the response
            if not os.path.exists(temp_dir):
                os.mkdir(temp_dir)

            temp_filename = temp_dir + "tempfile"
            # Sadly we need to write it to a file because pyunpack can't yet
            # decompress from binary data directly from memory
            temp_file = open(temp_filename, "wb")
            temp_file.write(data)
            temp_file.close()

            # Decompress
            filename = temp_filename
            archive_dir = temp_dir + "archive/"

            if not os.path.exists(archive_dir):
                os.mkdir(archive_dir)

            # Sometimes it's compressed multiple times
            while (True):
                arch = pyunpack.Archive(filename)
                arch.extractall(archive_dir)
                os.remove(filename)
                filename = archive_dir + os.listdir(archive_dir)[0]
                compressed = is_file_compressed(filename)
                if not compressed:
                    break

        temp_file = open(filename, "rb")
        data = bytes(temp_file.read())
        temp_file.close()
        os.remove(filename)
        return data
Exemple #28
0
    def _extract_archive(self,
                         archive_path,
                         unzip_dir='.',
                         archive_format='auto'):
        if archive_format is None:
            return False
        if archive_format == 'auto':
            archive_format = ['tar', 'zip', 'rar']
        if isinstance(archive_format, six.string_types):
            archive_format = [archive_format]

        is_match_fn = None
        open_fn = None

        for archive_type in archive_format:
            if archive_type == 'tar':
                open_fn = tarfile.open
                is_match_fn = tarfile.is_tarfile

            if archive_type == 'zip':
                open_fn = zipfile.ZipFile
                is_match_fn = zipfile.is_zipfile

            if archive_type == 'rar':
                archive = pyunpack.Archive(archive_path)
                archive.extractall('\\'.join(unzip_dir.split('\\')[:-1]))
                return True

            if is_match_fn(archive_path):
                with open_fn(archive_path) as archive:
                    try:
                        archive.extractall(unzip_dir)
                    except (tarfile.TarError, RuntimeError, KeyboardInterrupt):
                        if os.path.exists(unzip_dir):
                            if os.path.isfile(unzip_dir):
                                os.remove(unzip_dir)
                            else:
                                shutil.rmtree(unzip_dir)
                        raise
                return True

        return False
Exemple #29
0
def create_x(input, dilate, resnet_v1_x, url):
    return_model = input is None
    if input is None:
        input = tf.keras.layers.Input((None, None, 3))

    x = input
    x = resnet_v1_x(x, dilate=dilate, stem="b", config=config)

    model = tf.keras.Model(inputs=[input], outputs=[x])

    weights_compressed = tf.keras.utils.get_file(url.split("/")[-1], url)
    weights_uncompressed = weights_compressed[:-len("_2016_08_28.tar.gz"
                                                    )] + ".ckpt"
    if not os.path.isfile(weights_uncompressed):
        pyunpack.Archive(weights_compressed).extractall(
            os.path.dirname(weights_compressed))
    tfcv.model.pretrained.weights.load_ckpt(weights_uncompressed, model,
                                            convert_name)

    return model if return_model else x
Exemple #30
0
def extract_kaggle_archive_to_local_path(local_archive_path, local_fname):
    # Annoyingly we have to special case Kaggle files that have the string 'v2' in them.
    is_v2_archive = 'v2' in local_archive_path

    print("Extracting {} to {}...".format(local_archive_path, local_fname))
    archive = pyunpack.Archive(local_archive_path)
    archive.extractall(LOCAL_DATA_PATH)
    if is_v2_archive:
        # This is the directory to which the archive was extracted
        extract_dir = os.path.join(LOCAL_DATA_PATH,
                                   KAGGLE_V2_ARCHIVE_BASE_PATH)
        extracted_location = os.path.join(extract_dir, local_fname)
        desired_location = os.path.join(LOCAL_DATA_PATH, local_fname)
        os.rename(extracted_location, desired_location)
        print("Removing needless extracted directories...")
        os.rmdir(extract_dir)
        os.rmdir(os.path.join(LOCAL_DATA_PATH, 'data'))
        print("Done removing those needless directories!")

    print("Done with extraction, removing {}...".format(local_archive_path))
    os.remove(local_archive_path)
    print("All done extracting!")