def make_name_param(defendant: str, plaintiff: str = None) -> Tuple[str, int]: """Remove punctuation and return cleaned string plus its length in tokens.""" token_list = defendant.split() if plaintiff: token_list.extend(plaintiff.split()) # Strip out punctuation, which Solr doesn't like query_words = [strip_punct(t) for t in token_list] return " ".join(query_words), len(query_words)
def filter_by_matching_antecedent( opinion_candidates: Iterable[Opinion], antecedent_guess: Optional[str], ) -> Optional[Opinion]: if not antecedent_guess: return None antecedent_guess = strip_punct(antecedent_guess) candidates: List[Opinion] = [] for o in opinion_candidates: if antecedent_guess in best_case_name(o.cluster): candidates.append(o) # Remove duplicates and only accept if one candidate remains candidates = list(set(candidates)) return candidates[0] if len(candidates) == 1 else None
def get_court_by_paren(paren_string: str) -> Optional[str]: """Takes the citation string, usually something like "2d Cir", and maps that back to the court code. Does not work on SCOTUS, since that court lacks parentheticals, and needs to be handled after disambiguation has been completed. """ court_str = strip_punct(paren_string) court_code = None if court_str: # Map the string to a court, if possible. for court in courts: # Use startswith because citations are often missing final period, # e.g. "2d Cir" if court["citation_string"].startswith(court_str): court_code = court["id"] break return court_code
def get_citation_matches( citing_opinion: Opinion, citations: List[Union[NonopinionCitation, Citation]], ) -> List[Opinion]: """For a list of Citation objects (e.g., FullCitations, SupraCitations, IdCitations, etc.), try to match them to Opinion objects in the database using a variety of heuristics. Returns: - a list of Opinion objects, as matched to citations """ citation_matches = [] # List of matches to return was_matched = False # Whether the previous citation match was successful for citation in citations: matched_opinion = None # If the citation is to a non-opinion document, we currently cannot # match these. if isinstance(citation, NonopinionCitation): pass # If the citation is an id citation, just resolve it to the opinion # that was matched immediately prior (so long as the previous match # was successful). elif isinstance(citation, IdCitation): if was_matched: matched_opinion = citation_matches[-1] # If the citation is a supra citation, try to resolve it to one of # the citations that has already been matched elif isinstance(citation, SupraCitation): candidates = [] for cm in citation_matches: # The only clue we have to help us with resolution is the guess # of what the supra citation's antecedent is, so we try to # match that string to one of the known case names of the # already matched opinions. However, because case names might # look alike, matches using this heuristic may not be unique. # If no match, or more than one match, is found, then the supra # reference is effectively dropped. antecedent_guess = strip_punct(citation.antecedent_guess) cm_case_name = best_case_name(cm.cluster) if antecedent_guess in cm_case_name: candidates.append(cm) candidates = list(set(candidates)) # Remove duplicate matches if len(candidates) == 1: # Accept the match! matched_opinion = candidates[0] # Likewise, if the citation is a short form citation, try to resolve it # to one of the citations that has already been matched elif isinstance(citation, ShortformCitation): # We first try to match by using the reporter and volume number. # However, because matches made using this heuristic may not be # unique, we then refine by using the antecedent guess and only # accept the match if there is a single unique candidate. This # refinement may still fail (because the guess could be # meaningless), in which case the citation is not resolvable and # is dropped. candidates = [] for cm in citation_matches: for c in cm.cluster.citations.all(): if ( citation.reporter == c.reporter and citation.volume == c.volume ): candidates.append(cm) candidates = list(set(candidates)) # Remove duplicate matches if len(candidates) == 1: # Accept the match! matched_opinion = candidates[0] else: refined_candidates = [] for cm in candidates: antecedent_guess = strip_punct(citation.antecedent_guess) cm_case_name = best_case_name(cm.cluster) if antecedent_guess in cm_case_name: refined_candidates.append(cm) refined_candidates = list(set(refined_candidates)) if len(refined_candidates) == 1: # Accept the match! matched_opinion = refined_candidates[0] # Otherwise, the citation is just a regular citation, so try to match # it directly to an opinion else: matches = match_citation(citation, citing_doc=citing_opinion) if len(matches) == 1: match_id = matches[0]["id"] try: matched_opinion = Opinion.objects.get(pk=match_id) except Opinion.DoesNotExist: # No Opinions returned. Press on. pass except Opinion.MultipleObjectsReturned: # Multiple Opinions returned. Press on. pass else: # No match found for citation pass # If an opinion was successfully matched, add it to the list and # set the match fields on the original citation object so that they # can later be used for generating inline html if matched_opinion: was_matched = True citation_matches.append(matched_opinion) citation.match_url = matched_opinion.cluster.get_absolute_url() citation.match_id = matched_opinion.pk else: was_matched = False return citation_matches