def compare(): """Handle requests for /compare via POST""" # Read files if not request.files["file1"] or not request.files["file2"]: abort(400, "missing file") try: file1 = request.files["file1"].read().decode("utf-8") file2 = request.files["file2"].read().decode("utf-8") except Exception: abort(400, "invalid file") # Compare files if not request.form.get("algorithm"): abort(400, "missing algorithm") elif request.form.get("algorithm") == "lines": regexes = [f"^{re.escape(match)}$" for match in lines(file1, file2)] elif request.form.get("algorithm") == "sentences": regexes = [re.escape(match) for match in sentences(file1, file2)] elif request.form.get("algorithm") == "substrings": if not request.form.get("length"): abort(400, "missing length") elif not int(request.form.get("length")) > 0: abort(400, "invalid length") regexes = [re.escape(match) for match in substrings( file1, file2, int(request.form.get("length")))] else: abort(400, "invalid algorithm") # Highlight files highlights1 = highlight(file1, regexes) highlights2 = highlight(file2, regexes) # Output comparison return render_template("compare.html", file1=highlights1, file2=highlights2)
def main(): # Parse command-line arguments parser = argparse.ArgumentParser() group = parser.add_mutually_exclusive_group(required=True) group.add_argument("--lines", action="store_true", help="compare lines") group.add_argument("--sentences", action="store_true", help="compare sentences") group.add_argument("--substrings", metavar="N", type=positive, help="compare substrings of length N") parser.add_argument("FILE1", help="file to compare") parser.add_argument("FILE2", help="file to compare") args = vars(parser.parse_args()) # Read files try: with open(args["FILE1"], "r") as file: file1 = file.read() except IOError: sys.exit(f"Could not read {args['FILE1']}") try: with open(args["FILE2"], "r") as file: file2 = file.read() except IOError: sys.exit(f"Could not read {args['FILE2']}") # Compare files if args["lines"]: matches = lines(file1, file2) elif args["sentences"]: matches = sentences(file1, file2) elif args["substrings"]: matches = substrings(file1, file2, args["substrings"]) # Output matches, sorted from longest to shortest, with line endings escaped for match in sorted(matches, key=len, reverse=True): print(match.replace("\n", "\\n").replace("\r", "\\r"))
def classify_token(self, token): return sum((self._difference[x] for x in substrings(token)))
def classify_token(self, token): return sum(( self._difference[x] for x in substrings(token) ))