コード例 #1
0
def main():
    parser = argparse.ArgumentParser(
        description='Reports statistics about the different image signatures.')
    parser.add_argument('roots', metavar='DIR', nargs='*', default=["."],
                        help="a directory to scan for duplicate files " +
                        "(if not given '.' will be used)")
    add_common_command_line_arguments(parser)
    parser.add_argument('-s', '--min-similarity',
                        default=0.9,
                        help='require at least this image similarity')

    args = parser.parse_args()
    repo = dfr.db.Database(args.db[0])

    known = repo.imagefeedback.find()
    positive = [x for x in known if x.aresimilar == 1]
    print ("There are %d classified image pairs. %d (%.1f%%) " +
           "are classifized as similar.") % \
          (len(known), len(positive), (100.0*len(positive))/len(known))
    print ("%10s | %12s | %10s | %12s | %10s | %10s | %10s " +
           "| %10s | %10s | %10s") % \
          ("Signature", "Description", "Detected", "Classified", "TP",
           "FP", "FN", "Precision", "Recall", "F-Measure")

    for sig, sim in [(1, 0.95), (2, 0.999), (3, 0.9),
                     (4, 0.95), (5, 0.8)]:
        finder = ImageSimilarFinder(repo, args.roots, sig, 0)
        pairs = list(finder.find(sim))
        report(sig, pairs, known)
コード例 #2
0
def main():
    parser = argparse.ArgumentParser(
        description='Find files with equal or similar content.')
    parser.add_argument('roots', metavar='DIR', nargs='*', default=["."],
                        help="a directory to scan for duplicate files " +
                        "(if not given '.' will be used)")
    add_common_command_line_arguments(parser)
    parser.add_argument('-t', '--output-type', default="interactive",
                        help='determine the output type. Valid values are ' +
                        '"interactive", "csv" and "json". ' +
                        'Default is "interactive".')
    parser.add_argument('-o', '--output', default="-",
                        help='The output file name. "-" stands for stdout. ' +
                        'Default is "-".')
    parser.add_argument('-w', '--what', default="bitequal",
                        help='determine what is searched. Valid values are ' +
                        '"bitequal" for files which are equal ' +
                        'bit-wise, "truncated" for files which are ' +
                        'truncated (the larger files consists of the ' +
                        'smaller file and some extra content ' +
                        'afterwards) and "image" to search for similar ' +
                        'images. Default is "bitequal".')
    parser.add_argument('-s', '--min-similarity',
                        default=0.9,
                        help='require at least this image similarity. ' +
                        'Default is "0.9".')
    parser.add_argument('-S', '--image-signature',
                        default=3,
                        help='Image signature to use. Valid is 1, 2, 3, 4 ' +
                        'and 5. Default is "3".')
    parser.add_argument('-n', '--dry-run', action="store_true", dest='dry_run',
                        help='do not delete any files')

    args = parser.parse_args()
    repo = dfr.db.Database(args.db[0])

    if args.what == "image":
        finder = ImageSimilarFinder(repo, args.roots,
                                    int(args.image_signature))
        if args.output_type == "json":
            resolver = JsonImageSimilarResolver(args.output)
            finder = ImageSimilarBucketFinder(repo, args.roots,
                                              int(args.image_signature))
        elif args.output_type == "csv":
            resolver = CsvImageSimilarResolver(args.output)
        else:
            resolver = GuiImageSimilarResolver(args.dry_run)
        found_items = finder.find(float(args.min_similarity))
    elif args.what == "truncated":
        if args.output_type == "csv":
            resolver = CsvBitTruncatedResolver(args.output)
        else:
            resolver = InteractiveBitTruncatedResolver(args.dry_run)
        finder = BitTruncatedFinder(repo, args.roots)
        found_items = finder.find()
    else:
        finder = BitEqualFinder(repo, args.roots)
        if args.output_type == "json":
            resolver = JsonBitEqualResolver(args.output)
            finder = BitEqualBucketFinder(repo, args.roots)
        elif args.output_type == "csv":
            resolver = CsvBitEqualResolver(args.output)
        else:
            resolver = InteractiveBitEqualResolver(args.dry_run)

        found_items = finder.find()

    for item in found_items:
        resolver.resolve(item)
    resolver.finished()