def get_analysis(sha256_or_file, upload, save, outfile): """ Get the Koodous report of SHA256_OR_FILE. If the file has not be analyzed by Koodous, the file is just submitted (or not, according to the --upload option). """ ctx = click.get_current_context() api = ctx.meta.get('api') wdir = ctx.meta.get('wdir') is_file = os.path.isfile(sha256_or_file) and os.access(sha256_or_file, os.R_OK) sha256 = sha256_or_file if is_file: sha256 = file_hash(sha256_or_file) logger.info('File %s SHA-256 digest = %s', sha256_or_file, sha256) logger.info('Getting analysis of %s', sha256) analysis = api.get_analysis(sha256) click.echo(analysis) if analysis: click.echo(pygmentize_json(analysis)) if save: if not outfile: filepath = os.path.join(wdir, '{}.json'.format(sha256)) outfile = io.open(filepath, 'wb') else: filepath = outfile.name logger.info('Saving analysis to %s', filepath) json.dump(analysis, outfile) logger.info('Saved to %s successfully', filepath) elif is_file: logger.warning('File not found on Koodous') if upload: logger.info('Uploading file for analysis') try: upload_result = api.upload(sha256_or_file) logger.info('File %s uploaded successfully', upload_result) except Exception as ex: logger.error('Uploading %s failed: %s', sha256_or_file, ex)
def get_analysis(sha256_or_file, upload, save, outfile): """ Get the Koodous report of SHA256_OR_FILE. If the file has not be analyzed by Koodous, the file is just submitted (or not, according to the --upload option). """ ctx = click.get_current_context() api = ctx.meta.get('api') wdir = ctx.meta.get('wdir') is_file = os.path.isfile(sha256_or_file) and os.access( sha256_or_file, os.R_OK) sha256 = sha256_or_file if is_file: sha256 = file_hash(sha256_or_file) logger.info('File %s SHA-256 digest = %s', sha256_or_file, sha256) logger.info('Getting analysis of %s', sha256) analysis = api.get_analysis(sha256) click.echo(analysis) if analysis: click.echo(pygmentize_json(analysis)) if save: if not outfile: filepath = os.path.join(wdir, '{}.json'.format(sha256)) outfile = io.open(filepath, 'wb') else: filepath = outfile.name logger.info('Saving analysis to %s', filepath) json.dump(analysis, outfile) logger.info('Saved to %s successfully', filepath) elif is_file: logger.warning('File not found on Koodous') if upload: logger.info('Uploading file for analysis') try: upload_result = api.upload(sha256_or_file) logger.info('File %s uploaded successfully', upload_result) except Exception as ex: logger.error('Uploading %s failed: %s', sha256_or_file, ex)
def test_file_hash(self): filename = '__init__.py' block_size = 1048576 h1 = utils.file_hash(filename, block_size, hash_type='sha1') self.assertEqual(h1, 'da39a3ee5e6b4b0d3255bfef95601890afd80709')
def _main(): media_dirs = args.media_dirs for media_dir in media_dirs: if not os.path.exists(media_dir): raise ValueError(f"Media directory '{media_dir}' does not exist") log_file = args.log_file if args.log_file is not None else os.path.join( media_dirs[0], "image_organization.log") logger = create_logger(log_file, "duplicate_detector") logger.info("started new remove-duplicate session") print("Listing subtree...") all_files = [] for media_dir in media_dirs: all_files.extend(list_subtree(media_dir, recursive=args.recursive)) media_files = [] for f in tqdm(all_files, desc="Filtering non-media files"): try: if is_media(f): media_files.append(f) except OSError: logger.warning(f"OS error while checking if '{f}' is a media file") hash_dict = dict() n_duplicates_found = n_duplicates_removed = 0 for file_path in tqdm(media_files, desc="Removing duplicates"): try: h = file_hash(file_path) except PermissionError: logger.error( f"Permission error while computing the hash of '{file_path}'") continue if h in hash_dict: dup_candidates = hash_dict[h] dup_file = None for candidate_file in dup_candidates: if filecmp.cmp(file_path, candidate_file): dup_file = candidate_file break if dup_file is not None: n_duplicates_found += 1 if args.dry_run: logger.info( f"Would remove {file_path} - duplication of {dup_file}" ) else: logger.info( f"Remove {file_path} - duplication of {dup_file}") os.remove(file_path) n_duplicates_removed += 1 else: hash_dict[h].append(file_path) else: hash_dict[h] = [file_path] print( f"Done - removed {n_duplicates_removed}/{n_duplicates_found} duplicates" )