def main(): parser = argparse.ArgumentParser( description='Reports statistics about the different image signatures.') parser.add_argument('roots', metavar='DIR', nargs='*', default=["."], help="a directory to scan for duplicate files " + "(if not given '.' will be used)") add_common_command_line_arguments(parser) parser.add_argument('-s', '--min-similarity', default=0.9, help='require at least this image similarity') args = parser.parse_args() repo = dfr.db.Database(args.db[0]) known = repo.imagefeedback.find() positive = [x for x in known if x.aresimilar == 1] print ("There are %d classified image pairs. %d (%.1f%%) " + "are classifized as similar.") % \ (len(known), len(positive), (100.0*len(positive))/len(known)) print ("%10s | %12s | %10s | %12s | %10s | %10s | %10s " + "| %10s | %10s | %10s") % \ ("Signature", "Description", "Detected", "Classified", "TP", "FP", "FN", "Precision", "Recall", "F-Measure") for sig, sim in [(1, 0.95), (2, 0.999), (3, 0.9), (4, 0.95), (5, 0.8)]: finder = ImageSimilarFinder(repo, args.roots, sig, 0) pairs = list(finder.find(sim)) report(sig, pairs, known)
def main(): parser = argparse.ArgumentParser( description='Find files with equal or similar content.') parser.add_argument('roots', metavar='DIR', nargs='*', default=["."], help="a directory to scan for duplicate files " + "(if not given '.' will be used)") add_common_command_line_arguments(parser) parser.add_argument('-t', '--output-type', default="interactive", help='determine the output type. Valid values are ' + '"interactive", "csv" and "json". ' + 'Default is "interactive".') parser.add_argument('-o', '--output', default="-", help='The output file name. "-" stands for stdout. ' + 'Default is "-".') parser.add_argument('-w', '--what', default="bitequal", help='determine what is searched. Valid values are ' + '"bitequal" for files which are equal ' + 'bit-wise, "truncated" for files which are ' + 'truncated (the larger files consists of the ' + 'smaller file and some extra content ' + 'afterwards) and "image" to search for similar ' + 'images. Default is "bitequal".') parser.add_argument('-s', '--min-similarity', default=0.9, help='require at least this image similarity. ' + 'Default is "0.9".') parser.add_argument('-S', '--image-signature', default=3, help='Image signature to use. Valid is 1, 2, 3, 4 ' + 'and 5. Default is "3".') parser.add_argument('-n', '--dry-run', action="store_true", dest='dry_run', help='do not delete any files') args = parser.parse_args() repo = dfr.db.Database(args.db[0]) if args.what == "image": finder = ImageSimilarFinder(repo, args.roots, int(args.image_signature)) if args.output_type == "json": resolver = JsonImageSimilarResolver(args.output) finder = ImageSimilarBucketFinder(repo, args.roots, int(args.image_signature)) elif args.output_type == "csv": resolver = CsvImageSimilarResolver(args.output) else: resolver = GuiImageSimilarResolver(args.dry_run) found_items = finder.find(float(args.min_similarity)) elif args.what == "truncated": if args.output_type == "csv": resolver = CsvBitTruncatedResolver(args.output) else: resolver = InteractiveBitTruncatedResolver(args.dry_run) finder = BitTruncatedFinder(repo, args.roots) found_items = finder.find() else: finder = BitEqualFinder(repo, args.roots) if args.output_type == "json": resolver = JsonBitEqualResolver(args.output) finder = BitEqualBucketFinder(repo, args.roots) elif args.output_type == "csv": resolver = CsvBitEqualResolver(args.output) else: resolver = InteractiveBitEqualResolver(args.dry_run) found_items = finder.find() for item in found_items: resolver.resolve(item) resolver.finished()