def file_is_available(fso_path:str, interval:int=1) -> bool: try: prelim_hash:str = hashfile(fso_path) time.sleep(interval) secondary_hash:str = hashfile(fso_path) if prelim_hash == secondary_hash: return True except: pass return False
def dir_is_available(fso_path:str, interval:int=.5) -> bool: try: prelim_hash:list = [hashfile(file) for file in walk_dir(fso_path)] time.sleep(interval) secondary_hash:list = [hashfile(file) for file in walk_dir(fso_path)] if prelim_hash == secondary_hash: return True except: pass return False
def quick_hash(path): params = { "hexdigest": True, "sample_size": 4 * 1024**2, # 4 MB "sample_threshhold": 16 * 1024**2, # 16 MB } path = os.path.expanduser(path) if pathlib.Path(path).is_dir(): files = list(sorted(pathlib.Path(path).glob('**/*'))) res = pyfra.remote._hash_obs( *[(str(f.relative_to(pathlib.Path(path))), imohash.hashfile(str(f.resolve()), **params)) for f in files if f.is_file()])[:32] return res return imohash.hashfile(path, **params)
def wrapped(complex_filepath, pred_filepath): complex_filehash = hashfile(complex_filepath, hexdigest=True) previous_pred_filepath = memo.get(complex_filehash) if previous_pred_filepath is not None and Path(previous_pred_filepath).exists(): assert count_lines(complex_filepath) == count_lines(previous_pred_filepath) # Reuse previous prediction shutil.copyfile(previous_pred_filepath, pred_filepath) else: simplifier(complex_filepath, pred_filepath) # Save prediction memo[complex_filehash] = pred_filepath
def test_spec(): tests = [ (16384, 131072, 0, "00000000000000000000000000000000"), (16384, 131072, 1, "01659e2ec0f3c75bf39e43a41adb5d4f"), (16384, 131072, 127, "7f47671cc79d4374404b807249f3166e"), (16384, 131072, 128, "800183e5dbea2e5199ef7c8ea963a463"), (16384, 131072, 4095, "ff1f770d90d3773949d89880efa17e60"), (16384, 131072, 4096, "802048c26d66de432dbfc71afca6705d"), (16384, 131072, 131072, "8080085a3d3af2cb4b3a957811cdf370"), (16384, 131073, 131072, "808008282d3f3b53e1fd132cc51fcc1d"), (16384, 131072, 500000, "a0c21e44a0ba3bddee802a9d1c5332ca"), (50, 131072, 300000, "e0a712edd8815c606344aed13c44adcf") ] for test in tests: with open('.test_data', 'wb') as f: f.write(M(test[2])) assert binascii.hexlify(hashfile('.test_data', sample_threshhold=test[1], sample_size=test[0])) == test[3].encode() os.remove('.test_data')
def test_spec(): tests = [(16384, 131072, 0, "00000000000000000000000000000000"), (16384, 131072, 1, "01659e2ec0f3c75bf39e43a41adb5d4f"), (16384, 131072, 127, "7f47671cc79d4374404b807249f3166e"), (16384, 131072, 128, "800183e5dbea2e5199ef7c8ea963a463"), (16384, 131072, 4095, "ff1f770d90d3773949d89880efa17e60"), (16384, 131072, 4096, "802048c26d66de432dbfc71afca6705d"), (16384, 131072, 131072, "8080085a3d3af2cb4b3a957811cdf370"), (16384, 131073, 131072, "808008282d3f3b53e1fd132cc51fcc1d"), (16384, 131072, 500000, "a0c21e44a0ba3bddee802a9d1c5332ca"), (50, 131072, 300000, "e0a712edd8815c606344aed13c44adcf")] for test in tests: with open('.test_data', 'wb') as f: f.write(M(test[2])) assert binascii.hexlify( hashfile('.test_data', sample_threshhold=test[1], sample_size=test[0])) == test[3].encode() os.remove('.test_data')
def fromfile(cls, path: str, url_prefix: str) -> "FileInfo": """ File name must be in format name-v.v.v-platform :param url_prefix: url prefix, for example: /static :param path: path to file (relative!) :return: "FileInfo" """ file_name = basename(path) # Example: libivm-1.0.2-src.zip driver-1.0.0.zip format_matches = findall( r"(?P<name>\w+)-(?P<version>\d+\.\d+\.\d+)-?(?P<platform>.+)", file_name) if not format_matches: raise ValueError("File name " + file_name + " must be name-v.v.v-platform") name, version, platform = format_matches[0] file_language = None for language in all_languages: # pattern en.pdf, ru.exe, etc if platform.startswith(language + "."): file_language = language break # convert /foo/bar/spam/download/EyePointS1/firm # ware to download/EyePointS1/firmware path_short = join_path(*path.split(sep)[-3:]) return FileInfo( version, datetime.fromtimestamp(getmtime(path)).date(), getsize(path), urllib.quote(join_path(url_prefix, path_short)), file_name, path, imohash.hashfile(path), platform, name, language=file_language, )
def get_metadata(self): self.size = os.path.getsize(self.path) self.hashe = hashfile(self.path, hexdigest=True)
def get_file_hash(filepath): return hashfile(filepath, hexdigest=True)
def parse_obj(progress, obj): if progress.cancel: progress.sig_log.emit('取消中...') return None progress.sig_log.emit(obj['raw']) path, filename = os.path.split(obj['raw']) name, ext = os.path.splitext(filename) ext = ext.strip('.') key = hashfile(obj['raw'], hexdigest=True) cc = OpenCC('s2twp') tag = '{}/{}'.format(obj['parent'], name) tag = re.findall(u'[\u4e00-\u9fff]+|[a-zA-Z0-9]+', tag) tag = list(set(tag)) tag = [cc.convert(t) for t in tag if t != ''] obj = { 'name': name, 'type': ext, '_id': key, 'raw': obj['raw'], 'tag': sorted(tag), **obj } col = RESOURCE.find_one({'_id': key}) if col is not None: obj['tag'].extend(col['tag']) obj['tag'] = sorted(list(set(obj['tag']))) RESOURCE.update_one({'_id': key}, {'$set': {'tag': obj['tag']}}) obj['error'] = 'collide' return obj else: if obj['type'] in VIDEO_TYPES: try: cmd_meta = subprocess.check_output( [ 'ffprobe', '-v', 'quiet', '-print_format', 'json', '-show_format', '-show_entries', 'stream=r_frame_rate,width,height', obj['raw'] ] ) except subprocess.CalledProcessError as e: print(obj['raw']) print(e.output) obj['error'] = 'command' return obj meta = json.loads(cmd_meta.decode('utf-8')) try: frame_rate = eval(meta['streams'][0]['r_frame_rate']) except ZeroDivisionError: obj['error'] = 'command' return obj obj.update({ 'duration': float(meta['format']['duration']), 'fps': int(round(frame_rate)), 'width': meta['streams'][0]['width'], 'height': meta['streams'][0]['height'], }) if (obj['width'] + obj['height']) / 2.0 < FILTER_SIZE: obj['error'] = 'size' return obj elif obj['duration'] < FILTER_DURATION[0] or obj['duration'] > FILTER_DURATION[1]: obj['error'] = 'duration' return obj else: obj['error'] = 'null' return obj
def get_file_hash(filepath): return md5((hashfile(filepath, hexdigest=True) + Path(filepath).name).encode()).hexdigest()
Path(localErrFolder + date + i + 'remoterename.err').touch() logging.info( 'Unable to rename tmp file: {}. Probably already locked'.format(k)) logging.debug(e) logging.debug('Error in line: {}'.format(sys.exc_info()[-1].tb_lineno)) # Download renamed file to tmp local folder if flag1: logging.info( ' Download {} renamed file {} to tmp local folder '.format( searchString, k)) try: sftp.get(remoteFolder + k, localTmpFolder + k) h1 = hashfileobject(sftp.file(remoteFolder + k), hexdigest=True) h2 = hashfile(localTmpFolder + k, hexdigest=True) logging.debug(' Hash created for file: {}'.format(k)) if h1 == h2: flag2 = True logging.debug( ' Hash is ok, downloaded renamed file to tmp local folder finished {}' .format(k)) else: logging.info(' Hash is not ok: {}'.format(k)) Path(localErrFolder + date + i + 'hashisnotok.err').touch() except Exception as e: Path(localErrFolder + date + i + 'unabletodowload.err').touch() logging.info(' Unable to download file: {}'.format(k)) logging.debug(e)
def imohash_dir(): for entry in os.scandir("M:\\Good Backgrounds"): result = imohash.hashfile(entry.path, hexdigest=True) return result