Python strip_punct 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: eyecite.utils

메소드/함수: strip_punct

hotexamples.com에서의 예제들: 4

Python strip_punct - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 eyecite.utils.strip_punct에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: match_citations.py 프로젝트: nathreed/courtlistener

def make_name_param(defendant: str, plaintiff: str = None) -> Tuple[str, int]:
    """Remove punctuation and return cleaned string plus its length in tokens."""
    token_list = defendant.split()
    if plaintiff:
        token_list.extend(plaintiff.split())
        # Strip out punctuation, which Solr doesn't like
    query_words = [strip_punct(t) for t in token_list]
    return " ".join(query_words), len(query_words)

예제 #2

파일 보기

파일: match_citations.py 프로젝트: nathreed/courtlistener

def filter_by_matching_antecedent(
    opinion_candidates: Iterable[Opinion],
    antecedent_guess: Optional[str],
) -> Optional[Opinion]:
    if not antecedent_guess:
        return None

    antecedent_guess = strip_punct(antecedent_guess)
    candidates: List[Opinion] = []

    for o in opinion_candidates:
        if antecedent_guess in best_case_name(o.cluster):
            candidates.append(o)

    # Remove duplicates and only accept if one candidate remains
    candidates = list(set(candidates))
    return candidates[0] if len(candidates) == 1 else None

예제 #3

파일 보기

파일: helpers.py 프로젝트: pombredanne/eyecite

def get_court_by_paren(paren_string: str) -> Optional[str]:
    """Takes the citation string, usually something like "2d Cir", and maps
    that back to the court code.

    Does not work on SCOTUS, since that court lacks parentheticals, and
    needs to be handled after disambiguation has been completed.
    """
    court_str = strip_punct(paren_string)

    court_code = None
    if court_str:
        # Map the string to a court, if possible.
        for court in courts:
            # Use startswith because citations are often missing final period,
            # e.g. "2d Cir"
            if court["citation_string"].startswith(court_str):
                court_code = court["id"]
                break

    return court_code

예제 #4

파일 보기

파일: match_citations.py 프로젝트: weiplanet/courtlistener

def get_citation_matches(
    citing_opinion: Opinion,
    citations: List[Union[NonopinionCitation, Citation]],
) -> List[Opinion]:
    """For a list of Citation objects (e.g., FullCitations, SupraCitations,
    IdCitations, etc.), try to match them to Opinion objects in the database
    using a variety of heuristics.

    Returns:
      - a list of Opinion objects, as matched to citations
    """
    citation_matches = []  # List of matches to return
    was_matched = False  # Whether the previous citation match was successful

    for citation in citations:
        matched_opinion = None

        # If the citation is to a non-opinion document, we currently cannot
        # match these.
        if isinstance(citation, NonopinionCitation):
            pass

        # If the citation is an id citation, just resolve it to the opinion
        # that was matched immediately prior (so long as the previous match
        # was successful).
        elif isinstance(citation, IdCitation):
            if was_matched:
                matched_opinion = citation_matches[-1]

        # If the citation is a supra citation, try to resolve it to one of
        # the citations that has already been matched
        elif isinstance(citation, SupraCitation):
            candidates = []
            for cm in citation_matches:
                # The only clue we have to help us with resolution is the guess
                # of what the supra citation's antecedent is, so we try to
                # match that string to one of the known case names of the
                # already matched opinions. However, because case names might
                # look alike, matches using this heuristic may not be unique.
                # If no match, or more than one match, is found, then the supra
                # reference is effectively dropped.
                antecedent_guess = strip_punct(citation.antecedent_guess)
                cm_case_name = best_case_name(cm.cluster)
                if antecedent_guess in cm_case_name:
                    candidates.append(cm)

            candidates = list(set(candidates))  # Remove duplicate matches
            if len(candidates) == 1:
                # Accept the match!
                matched_opinion = candidates[0]

        # Likewise, if the citation is a short form citation, try to resolve it
        # to one of the citations that has already been matched
        elif isinstance(citation, ShortformCitation):
            # We first try to match by using the reporter and volume number.
            # However, because matches made using this heuristic may not be
            # unique, we then refine by using the antecedent guess and only
            # accept the match if there is a single unique candidate. This
            # refinement may still fail (because the guess could be
            # meaningless), in which case the citation is not resolvable and
            # is dropped.
            candidates = []
            for cm in citation_matches:
                for c in cm.cluster.citations.all():
                    if (
                        citation.reporter == c.reporter
                        and citation.volume == c.volume
                    ):
                        candidates.append(cm)

            candidates = list(set(candidates))  # Remove duplicate matches
            if len(candidates) == 1:
                # Accept the match!
                matched_opinion = candidates[0]
            else:
                refined_candidates = []
                for cm in candidates:
                    antecedent_guess = strip_punct(citation.antecedent_guess)
                    cm_case_name = best_case_name(cm.cluster)
                    if antecedent_guess in cm_case_name:
                        refined_candidates.append(cm)

                refined_candidates = list(set(refined_candidates))
                if len(refined_candidates) == 1:
                    # Accept the match!
                    matched_opinion = refined_candidates[0]

        # Otherwise, the citation is just a regular citation, so try to match
        # it directly to an opinion
        else:
            matches = match_citation(citation, citing_doc=citing_opinion)

            if len(matches) == 1:
                match_id = matches[0]["id"]
                try:
                    matched_opinion = Opinion.objects.get(pk=match_id)
                except Opinion.DoesNotExist:
                    # No Opinions returned. Press on.
                    pass
                except Opinion.MultipleObjectsReturned:
                    # Multiple Opinions returned. Press on.
                    pass
            else:
                # No match found for citation
                pass

        # If an opinion was successfully matched, add it to the list and
        # set the match fields on the original citation object so that they
        # can later be used for generating inline html
        if matched_opinion:
            was_matched = True
            citation_matches.append(matched_opinion)
            citation.match_url = matched_opinion.cluster.get_absolute_url()
            citation.match_id = matched_opinion.pk
        else:
            was_matched = False

    return citation_matches