コード例 #1
0
ファイル: __init__.py プロジェクト: NelsonPython/TextAnalyzer
    def get_report(self, doc):
        nativespeaker_report = Report(
            "\nTerms biased towards native speakers:")
        words_with_indices = doc.words_with_indices()
        #print(words_with_indices)

        found = False
        for word, start, stop in words_with_indices:
            word = word.lower()
            for nativeword in NATIVE_WORDS.Used:
                x = re.search(nativeword, word)
                if (x):
                    #print(x.span(), x.string, x.group())
                    found = True
                    if NATIVE_WORDS['Recommend2'].loc[
                            NATIVE_WORDS['Used'] ==
                            x.group()].item() == 'none':
                        recommend2 = ""
                    else:
                        recommend2 = " or '" + NATIVE_WORDS['Recommend2'].loc[
                            NATIVE_WORDS['Used'] == x.group()].item() + "'"

                    print(
                        "Consider replacing '", x.group(), "' with '",
                        NATIVE_WORDS['Recommend1'].loc[NATIVE_WORDS['Used'] ==
                                                       x.group()].item(), "'",
                        recommend2)
                    nativespeaker_report.add_flag(
                        Flag(start, stop, Issue(word)))
        if found:
            nativespeaker_report.set_summary(
                "To encourage non-native speakers, use short words and simple sentences"
            )
        return nativespeaker_report
コード例 #2
0
    def get_report(self, doc):
        """
        Report the usage of unnecessarily gendered words.

        Arguments:
            doc (Document): The document to check

        Returns:
            Report

        """
        report = Report("\nUnnecessary use of gender terms")

        token_indices = doc.words_with_indices()

        found = False
        for word, start, stop in token_indices:
            # NELSON - changed loop so word can be found in GENDERED_WORDS
            if word.lower() in GENDERED_WORDS:
                found = True
                report.add_flag(
                    Flag(start, stop, Issue("{word}".format(word=word))))

        if found:
            report.set_summary(
                "Replace gender terms with 'person' or 'individual', or a position-specific term, such as 'doctor' or 'author'"
            )
        return report
コード例 #3
0
ファイル: __init__.py プロジェクト: NelsonPython/TextAnalyzer
    def get_report(self, doc):
        """
        Generate a report on the text based upon mentions of
        personal-life-related words.

        Arguments:
            doc (Document): The document to check

        Returns:
            Report

        """
        report = Report("\nTerms about personal life")

        token_indices = doc.words_with_indices()

        found = False
        for word, start, stop in token_indices:
            # NELSON - changed loop to look for word in lis of PERSONAL_LIFE_TERMS
            if word.lower() in PERSONAL_LIFE_TERMS:
                found = True
                report.add_flag(
                    Flag(start, stop, Issue("{word}".format(word=word))))

        if found:
            report.set_summary('Found words relating to personal life')
        return report
コード例 #4
0
ファイル: __init__.py プロジェクト: JING1201/gender-bias
    def get_report(self, doc):
        """
        Generates a report on the text based upon effort vs accomplishment.

        Also adds a summary if there are NO words about accomplishment, or if
        the ratio of effort to accomplishment words is particularly low.

        Arguments:
            doc (Document): The document to check

        Returns:
            Report

        """
        report = Report("Effort vs Accomplishment")
        effort_flags = []
        accomplishment_flags = []

        token_indices = doc.words_with_indices()

        for word, start, stop in token_indices:
            if word.lower() in EFFORT_WORDS:
                effort_flags.append(
                    Flag(
                        start, stop,
                        Issue(
                            "Effort vs Accomplishment",
                            "The word '{word}' tends to speak about effort more than accomplishment."
                            .format(word=word),
                            "Try replacing with phrasing that emphasizes accomplishment.",
                            bias=Issue.negative_result)))
            if word.lower() in ACCOMPLISHMENT_WORDS:
                accomplishment_flags.append(
                    Flag(
                        start, stop,
                        Issue(
                            "Effort vs Accomplishment",
                            "The word '{word}' tends to speak about accomplishment more than effort."
                            .format(word=word),
                            bias=Issue.positive_result)))

        for flag in effort_flags:
            report.add_flag(flag)

        if (len(accomplishment_flags) is 0
                or len(effort_flags) / len(accomplishment_flags) >
                1.2  # TODO: Arbitrary!
            ):
            # Avoid divide-by-zero errors
            if len(accomplishment_flags) == 0:
                report.set_summary(
                    "This document has too few words about concrete accomplishment."
                )
            else:
                report.set_summary(
                    "This document has a high ratio ({}:{}) of words suggesting effort to words suggesting concrete accomplishment."
                    .format(len(effort_flags), len(accomplishment_flags)))

        return report
コード例 #5
0
ファイル: __init__.py プロジェクト: NelsonPython/TextAnalyzer
    def get_report(self, doc):
        """
        Generates a report on the text based upon effort vs accomplishment.

        Also adds a summary if there are NO words about accomplishment, or if
        the ratio of effort to accomplishment words is particularly low.

        Arguments:
            doc (Document): The document to check

        Returns:
            Report

        """
        report = Report("\nTerms focusing on effort vs accomplishment")
        effort_flags = []
        accomplishment_flags = []

        token_indices = doc.words_with_indices()

        for word, start, stop in token_indices:
            # NELSON - changed loop so word can be found in EFFORT or ACCOMPLISHMENT words
            if word.lower() in EFFORT_WORDS:
                effort_flags.append(
                    Flag(
                        start, stop,
                        Issue("{word}".format(word=word),
                              bias=Issue.negative_result)))
            if word.lower() in ACCOMPLISHMENT_WORDS:
                accomplishment_flags.append(
                    Flag(
                        start, stop,
                        Issue(
                            "'{word}' describes accomplishment more than effort"
                            .format(word=word),
                            bias=Issue.positive_result)))

        for flag in effort_flags:
            report.add_flag(flag)

        if (len(accomplishment_flags) is 0
                or len(effort_flags) / len(accomplishment_flags) >
                1.2  # TODO: Arbitrary!
            ):
            # Avoid divide-by-zero errors
            if len(accomplishment_flags) == 0:
                report.set_summary(
                    "Too few words about concrete accomplishments")
            else:
                report.set_summary(
                    "High ratio ({}:{}) of words suggesting effort ratherthan accomplishment."
                    .format(len(effort_flags), len(accomplishment_flags)))

        return report
コード例 #6
0
    def get_report(self, doc):
        """
        Generate a report on the text based upon mentions of publications.

        Arguments:
            doc (Document): The document to check

        Returns:
            Report

        """
        report = Report("Publications")
        report.set_summary(self.get_summary(doc))
        return report
コード例 #7
0
ファイル: __init__.py プロジェクト: rt-hamilton/gender-bias
    def get_report(self, doc):
        male_report = Report("\nTerms biased towards men:")
        words_with_indices = doc.words_with_indices()
        #print(words_with_indices)

        found = False
        for word, start, stop in words_with_indices:
            word = word.lower()
            for maleword in MALE_WORDS:
                searchTerm = "^" + maleword + ".."
                x = re.search(searchTerm, word)
                if (x):
                    #print(x.span(), x.string, x.group())
                    found = True
                    male_report.add_flag(
                        Flag(start, stop, Issue("{word}".format(word=word))))
        if found:
            male_report.set_summary(
                "Depending on context, these words may be biased towards recruiting men"
            )
        return male_report
コード例 #8
0
ファイル: __init__.py プロジェクト: gender-bias/gender-bias
    def get_report(self, doc: Document):
        """
        Generates a report on the text based upon effort vs accomplishment.

        Also adds a summary if there are NO words about accomplishment, or if
        the ratio of effort to accomplishment words is particularly low.

        Arguments:
            doc (Document): The document to check

        Returns:
            Report

        """
        report = Report("Effort vs Accomplishment")

        # Keep track of accomplishment- or effort-specific words:
        accomplishment_words = 0
        effort_words = 0

        # Keep track of flags (we'll deduplicate them before reporting)
        flags = set()

        for word, start, stop in doc.words_with_indices():
            if word.lower() in EFFORT_WORDS:
                report.add_flag(
                    Flag(
                        start,
                        stop,
                        Issue(
                            "Effort vs Accomplishment",
                            f"The word '{word}' tends to speak more about " +
                            "effort than concrete accomplishment.",
                            # lower negative bias because this may be spurious.
                            # Specifically, the presence of these words doesn't
                            # mean that it's being attributed to the subject of
                            # the letter.
                            bias=Issue.negative_result * 0.5,
                            fix="Speak about concrete achievement rather " +
                            "than abstract effort.",
                        ),
                    ))
            if word.lower() in ACCOMPLISHMENT_WORDS:
                report.add_flag(
                    Flag(
                        start,
                        stop,
                        Issue(
                            "Effort vs Accomplishment",
                            f"The word '{word}' illustrates concrete accomplishment.",
                            # lower positive valence because this may be spurious.
                            # Specifically, the presence of these words doesn't
                            # mean that it's being attributed to the subject of
                            # the letter.
                            bias=Issue.positive_result * 0.5,
                        ),
                    ))

        doc = nlp(doc.text())

        # Loop over tokens to find adjectives to flag:
        for token in doc:
            # Find all tokens whose dependency tag is adjectival complement:
            if token.dep_ == "acomp":
                # Get all dependencies of the head/root of the tagged sentence
                # and look for nouns (which are likely to be the referenced
                # subject of this adjectival complement):
                for reference_token in token.head.children:
                    # If this token IS a noun but it's an ignored pronoun, move on
                    if (reference_token.pos_ in ["PRON", "PROPN"] and
                            reference_token.text not in _PRONOUNS_TO_IGNORE):
                        # If accomplishment-flavored, add positive flag.
                        if token.text in ACCOMPLISHMENT_WORDS:
                            accomplishment_words += 1
                            warning = (
                                f"The word '{token.text}' refers to " +
                                "explicit accomplishment rather than effort.")
                            suggestion = ""
                            bias = Issue.positive_result

                        # If effort-flavored, add negative flag.
                        elif token.text in EFFORT_WORDS:
                            effort_words += 1
                            warning = (
                                f"The word '{token.text}' tends to speak " +
                                "about effort more than accomplishment.")
                            suggestion = ("Try replacing with phrasing that " +
                                          "emphasizes accomplishment.")
                            bias = Issue.negative_result

                        else:
                            continue

                        flags.add((
                            token.sent.start_char,
                            token.sent.end_char,
                            warning,
                            suggestion,
                            bias,
                        ))

        for (start, stop, warning, suggestion, bias) in flags:
            # Add a flag to the report:
            report.add_flag(
                Flag(
                    start,
                    stop,
                    Issue("Effort vs Accomplishment",
                          warning,
                          suggestion,
                          bias=bias),
                ))

        if 0 < effort_words <= accomplishment_words:
            report.set_summary(
                "This document has a high ratio of words suggesting " +
                f"effort ({effort_words}) to words suggesting " +
                f"concrete accomplishment ({effort_words}).", )

        return report