def file_is_available(fso_path:str, interval:int=1) -> bool:
    try:
        prelim_hash:str = hashfile(fso_path)
        time.sleep(interval)
        secondary_hash:str = hashfile(fso_path)

        if prelim_hash == secondary_hash:
            return True
    except:
        pass

    return False
def dir_is_available(fso_path:str, interval:int=.5) -> bool:
    try:
        prelim_hash:list = [hashfile(file) for file in walk_dir(fso_path)]
        time.sleep(interval)
        secondary_hash:list = [hashfile(file) for file in walk_dir(fso_path)]
    
        if prelim_hash == secondary_hash:
            return True
    except:
        pass

    return False
Beispiel #3
0
def quick_hash(path):
    params = {
        "hexdigest": True,
        "sample_size": 4 * 1024**2,  # 4 MB
        "sample_threshhold": 16 * 1024**2,  # 16 MB
    }
    path = os.path.expanduser(path)
    if pathlib.Path(path).is_dir():
        files = list(sorted(pathlib.Path(path).glob('**/*')))
        res = pyfra.remote._hash_obs(
            *[(str(f.relative_to(pathlib.Path(path))),
               imohash.hashfile(str(f.resolve()), **params)) for f in files
              if f.is_file()])[:32]
        return res
    return imohash.hashfile(path, **params)
Beispiel #4
0
 def wrapped(complex_filepath, pred_filepath):
     complex_filehash = hashfile(complex_filepath, hexdigest=True)
     previous_pred_filepath = memo.get(complex_filehash)
     if previous_pred_filepath is not None and Path(previous_pred_filepath).exists():
         assert count_lines(complex_filepath) == count_lines(previous_pred_filepath)
         # Reuse previous prediction
         shutil.copyfile(previous_pred_filepath, pred_filepath)
     else:
         simplifier(complex_filepath, pred_filepath)
     # Save prediction
     memo[complex_filehash] = pred_filepath
Beispiel #5
0
def test_spec():
    tests = [
        (16384, 131072, 0,      "00000000000000000000000000000000"),
        (16384, 131072, 1,      "01659e2ec0f3c75bf39e43a41adb5d4f"),
        (16384, 131072, 127,    "7f47671cc79d4374404b807249f3166e"),
        (16384, 131072, 128,    "800183e5dbea2e5199ef7c8ea963a463"),
        (16384, 131072, 4095,   "ff1f770d90d3773949d89880efa17e60"),
        (16384, 131072, 4096,   "802048c26d66de432dbfc71afca6705d"),
        (16384, 131072, 131072, "8080085a3d3af2cb4b3a957811cdf370"),
        (16384, 131073, 131072, "808008282d3f3b53e1fd132cc51fcc1d"),
        (16384, 131072, 500000, "a0c21e44a0ba3bddee802a9d1c5332ca"),
        (50,    131072, 300000, "e0a712edd8815c606344aed13c44adcf")
        ]

    for test in tests:
        with open('.test_data', 'wb') as f:
            f.write(M(test[2]))
        assert binascii.hexlify(hashfile('.test_data', sample_threshhold=test[1], sample_size=test[0])) == test[3].encode()
        os.remove('.test_data')
Beispiel #6
0
def test_spec():
    tests = [(16384, 131072, 0, "00000000000000000000000000000000"),
             (16384, 131072, 1, "01659e2ec0f3c75bf39e43a41adb5d4f"),
             (16384, 131072, 127, "7f47671cc79d4374404b807249f3166e"),
             (16384, 131072, 128, "800183e5dbea2e5199ef7c8ea963a463"),
             (16384, 131072, 4095, "ff1f770d90d3773949d89880efa17e60"),
             (16384, 131072, 4096, "802048c26d66de432dbfc71afca6705d"),
             (16384, 131072, 131072, "8080085a3d3af2cb4b3a957811cdf370"),
             (16384, 131073, 131072, "808008282d3f3b53e1fd132cc51fcc1d"),
             (16384, 131072, 500000, "a0c21e44a0ba3bddee802a9d1c5332ca"),
             (50, 131072, 300000, "e0a712edd8815c606344aed13c44adcf")]

    for test in tests:
        with open('.test_data', 'wb') as f:
            f.write(M(test[2]))
        assert binascii.hexlify(
            hashfile('.test_data',
                     sample_threshhold=test[1],
                     sample_size=test[0])) == test[3].encode()
        os.remove('.test_data')
Beispiel #7
0
    def fromfile(cls, path: str, url_prefix: str) -> "FileInfo":
        """
        File name must be in format name-v.v.v-platform
        :param url_prefix: url prefix, for example: /static
        :param path: path to file (relative!)
        :return: "FileInfo"
        """
        file_name = basename(path)

        # Example: libivm-1.0.2-src.zip driver-1.0.0.zip
        format_matches = findall(
            r"(?P<name>\w+)-(?P<version>\d+\.\d+\.\d+)-?(?P<platform>.+)",
            file_name)
        if not format_matches:
            raise ValueError("File name " + file_name +
                             " must be name-v.v.v-platform")
        name, version, platform = format_matches[0]

        file_language = None
        for language in all_languages:
            # pattern en.pdf, ru.exe, etc
            if platform.startswith(language + "."):
                file_language = language
                break

        # convert /foo/bar/spam/download/EyePointS1/firm
        # ware to download/EyePointS1/firmware
        path_short = join_path(*path.split(sep)[-3:])

        return FileInfo(
            version,
            datetime.fromtimestamp(getmtime(path)).date(),
            getsize(path),
            urllib.quote(join_path(url_prefix, path_short)),
            file_name,
            path,
            imohash.hashfile(path),
            platform,
            name,
            language=file_language,
        )
Beispiel #8
0
 def get_metadata(self):
     self.size = os.path.getsize(self.path)
     self.hashe = hashfile(self.path, hexdigest=True)
Beispiel #9
0
def get_file_hash(filepath):
    return hashfile(filepath, hexdigest=True)
Beispiel #10
0
def parse_obj(progress, obj):
    if progress.cancel:
        progress.sig_log.emit('取消中...')
        return None

    progress.sig_log.emit(obj['raw'])

    path, filename = os.path.split(obj['raw'])
    name, ext = os.path.splitext(filename)
    ext = ext.strip('.')
    key = hashfile(obj['raw'], hexdigest=True)

    cc = OpenCC('s2twp')
    tag = '{}/{}'.format(obj['parent'], name)
    tag = re.findall(u'[\u4e00-\u9fff]+|[a-zA-Z0-9]+', tag)
    tag = list(set(tag))
    tag = [cc.convert(t) for t in tag if t != '']

    obj = {
        'name': name,
        'type': ext,
        '_id': key,
        'raw': obj['raw'],
        'tag': sorted(tag),
        **obj
    }

    col = RESOURCE.find_one({'_id': key})
    if col is not None:
        obj['tag'].extend(col['tag'])
        obj['tag'] = sorted(list(set(obj['tag'])))
        RESOURCE.update_one({'_id': key}, {'$set': {'tag': obj['tag']}})
        obj['error'] = 'collide'
        return obj
    else:
        if obj['type'] in VIDEO_TYPES:
            try:
                cmd_meta = subprocess.check_output(
                    [
                        'ffprobe',
                        '-v', 'quiet',
                        '-print_format', 'json',
                        '-show_format',
                        '-show_entries',
                        'stream=r_frame_rate,width,height',
                        obj['raw']
                    ]
                )
            except subprocess.CalledProcessError as e:
                print(obj['raw'])
                print(e.output)
                obj['error'] = 'command'
                return obj

            meta = json.loads(cmd_meta.decode('utf-8'))

            try:
                frame_rate = eval(meta['streams'][0]['r_frame_rate'])
            except ZeroDivisionError:
                obj['error'] = 'command'
                return obj

            obj.update({
                'duration': float(meta['format']['duration']),
                'fps': int(round(frame_rate)),
                'width': meta['streams'][0]['width'],
                'height': meta['streams'][0]['height'],
            })

    if (obj['width'] + obj['height']) / 2.0 < FILTER_SIZE:
        obj['error'] = 'size'
        return obj
    elif obj['duration'] < FILTER_DURATION[0] or obj['duration'] > FILTER_DURATION[1]:
        obj['error'] = 'duration'
        return obj
    else:
        obj['error'] = 'null'
        return obj
def get_file_hash(filepath):
    return md5((hashfile(filepath, hexdigest=True) +
                Path(filepath).name).encode()).hexdigest()
Beispiel #12
0
        Path(localErrFolder + date + i + 'remoterename.err').touch()
        logging.info(
            'Unable to rename tmp file: {}. Probably already locked'.format(k))
        logging.debug(e)
        logging.debug('Error in line: {}'.format(sys.exc_info()[-1].tb_lineno))

    # Download  renamed file to tmp local folder
    if flag1:
        logging.info(
            ' Download {}   renamed file {} to tmp local folder '.format(
                searchString, k))
        try:
            sftp.get(remoteFolder + k, localTmpFolder + k)

            h1 = hashfileobject(sftp.file(remoteFolder + k), hexdigest=True)
            h2 = hashfile(localTmpFolder + k, hexdigest=True)
            logging.debug(' Hash created for file: {}'.format(k))

            if h1 == h2:
                flag2 = True
                logging.debug(
                    ' Hash is ok, downloaded  renamed file to tmp local folder finished {}'
                    .format(k))
            else:
                logging.info(' Hash is not ok: {}'.format(k))
                Path(localErrFolder + date + i + 'hashisnotok.err').touch()

        except Exception as e:
            Path(localErrFolder + date + i + 'unabletodowload.err').touch()
            logging.info(' Unable to download file: {}'.format(k))
            logging.debug(e)
Beispiel #13
0
def imohash_dir():
  for entry in os.scandir("M:\\Good Backgrounds"):
    result = imohash.hashfile(entry.path, hexdigest=True)
  return result