예제 #1
0
def make_name_param(defendant: str, plaintiff: str = None) -> Tuple[str, int]:
    """Remove punctuation and return cleaned string plus its length in tokens."""
    token_list = defendant.split()
    if plaintiff:
        token_list.extend(plaintiff.split())
        # Strip out punctuation, which Solr doesn't like
    query_words = [strip_punct(t) for t in token_list]
    return " ".join(query_words), len(query_words)
예제 #2
0
def filter_by_matching_antecedent(
    opinion_candidates: Iterable[Opinion],
    antecedent_guess: Optional[str],
) -> Optional[Opinion]:
    if not antecedent_guess:
        return None

    antecedent_guess = strip_punct(antecedent_guess)
    candidates: List[Opinion] = []

    for o in opinion_candidates:
        if antecedent_guess in best_case_name(o.cluster):
            candidates.append(o)

    # Remove duplicates and only accept if one candidate remains
    candidates = list(set(candidates))
    return candidates[0] if len(candidates) == 1 else None
예제 #3
0
def get_court_by_paren(paren_string: str) -> Optional[str]:
    """Takes the citation string, usually something like "2d Cir", and maps
    that back to the court code.

    Does not work on SCOTUS, since that court lacks parentheticals, and
    needs to be handled after disambiguation has been completed.
    """
    court_str = strip_punct(paren_string)

    court_code = None
    if court_str:
        # Map the string to a court, if possible.
        for court in courts:
            # Use startswith because citations are often missing final period,
            # e.g. "2d Cir"
            if court["citation_string"].startswith(court_str):
                court_code = court["id"]
                break

    return court_code
예제 #4
0
def get_citation_matches(
    citing_opinion: Opinion,
    citations: List[Union[NonopinionCitation, Citation]],
) -> List[Opinion]:
    """For a list of Citation objects (e.g., FullCitations, SupraCitations,
    IdCitations, etc.), try to match them to Opinion objects in the database
    using a variety of heuristics.

    Returns:
      - a list of Opinion objects, as matched to citations
    """
    citation_matches = []  # List of matches to return
    was_matched = False  # Whether the previous citation match was successful

    for citation in citations:
        matched_opinion = None

        # If the citation is to a non-opinion document, we currently cannot
        # match these.
        if isinstance(citation, NonopinionCitation):
            pass

        # If the citation is an id citation, just resolve it to the opinion
        # that was matched immediately prior (so long as the previous match
        # was successful).
        elif isinstance(citation, IdCitation):
            if was_matched:
                matched_opinion = citation_matches[-1]

        # If the citation is a supra citation, try to resolve it to one of
        # the citations that has already been matched
        elif isinstance(citation, SupraCitation):
            candidates = []
            for cm in citation_matches:
                # The only clue we have to help us with resolution is the guess
                # of what the supra citation's antecedent is, so we try to
                # match that string to one of the known case names of the
                # already matched opinions. However, because case names might
                # look alike, matches using this heuristic may not be unique.
                # If no match, or more than one match, is found, then the supra
                # reference is effectively dropped.
                antecedent_guess = strip_punct(citation.antecedent_guess)
                cm_case_name = best_case_name(cm.cluster)
                if antecedent_guess in cm_case_name:
                    candidates.append(cm)

            candidates = list(set(candidates))  # Remove duplicate matches
            if len(candidates) == 1:
                # Accept the match!
                matched_opinion = candidates[0]

        # Likewise, if the citation is a short form citation, try to resolve it
        # to one of the citations that has already been matched
        elif isinstance(citation, ShortformCitation):
            # We first try to match by using the reporter and volume number.
            # However, because matches made using this heuristic may not be
            # unique, we then refine by using the antecedent guess and only
            # accept the match if there is a single unique candidate. This
            # refinement may still fail (because the guess could be
            # meaningless), in which case the citation is not resolvable and
            # is dropped.
            candidates = []
            for cm in citation_matches:
                for c in cm.cluster.citations.all():
                    if (
                        citation.reporter == c.reporter
                        and citation.volume == c.volume
                    ):
                        candidates.append(cm)

            candidates = list(set(candidates))  # Remove duplicate matches
            if len(candidates) == 1:
                # Accept the match!
                matched_opinion = candidates[0]
            else:
                refined_candidates = []
                for cm in candidates:
                    antecedent_guess = strip_punct(citation.antecedent_guess)
                    cm_case_name = best_case_name(cm.cluster)
                    if antecedent_guess in cm_case_name:
                        refined_candidates.append(cm)

                refined_candidates = list(set(refined_candidates))
                if len(refined_candidates) == 1:
                    # Accept the match!
                    matched_opinion = refined_candidates[0]

        # Otherwise, the citation is just a regular citation, so try to match
        # it directly to an opinion
        else:
            matches = match_citation(citation, citing_doc=citing_opinion)

            if len(matches) == 1:
                match_id = matches[0]["id"]
                try:
                    matched_opinion = Opinion.objects.get(pk=match_id)
                except Opinion.DoesNotExist:
                    # No Opinions returned. Press on.
                    pass
                except Opinion.MultipleObjectsReturned:
                    # Multiple Opinions returned. Press on.
                    pass
            else:
                # No match found for citation
                pass

        # If an opinion was successfully matched, add it to the list and
        # set the match fields on the original citation object so that they
        # can later be used for generating inline html
        if matched_opinion:
            was_matched = True
            citation_matches.append(matched_opinion)
            citation.match_url = matched_opinion.cluster.get_absolute_url()
            citation.match_id = matched_opinion.pk
        else:
            was_matched = False

    return citation_matches