def check_for_match(new_case, possibilities): """ This code is a variation of get_closest_match_index used in juriscraper. It checks if the case name we are trying to add matches any duplicate citation cases already in the system. :param new_case: The importing case name :param possibilities: The array of cases already in the system with the same citation :return: Returns the match if any, otherwise returns None. """ new_case = normalize_phrase(new_case) possibilities = [normalize_phrase(x) for x in possibilities] try: match = difflib.get_close_matches( new_case, possibilities, n=1, cutoff=0.7 )[0] return match except IndexError: # No good matches. return None
def test_normalize_phrase(self): """Tests normalization of case titles.""" test_pairs = [ ["Commissioner v. Palin", "palin"], ["Commr v. Palin", "palin"], ["Comm'r v. Palin", "palin"], [ "United States v. Learned Hand et. al.", "unitedstateslearnedhand", ], ["Baker, Plaintiff v. Palin, Defendant", "bakerpalin"], ] for pair in test_pairs: self.assertEqual( normalize_phrase(harmonize(clean_string(pair[0]))), pair[1])