Beispiel #1
0
    def get_report(self, doc):
        """
        Generates a report on the text based upon effort vs accomplishment.

        Also adds a summary if there are NO words about accomplishment, or if
        the ratio of effort to accomplishment words is particularly low.

        Arguments:
            doc (Document): The document to check

        Returns:
            Report

        """
        report = Report("Effort vs Accomplishment")
        effort_flags = []
        accomplishment_flags = []

        token_indices = doc.words_with_indices()

        for word, start, stop in token_indices:
            if word.lower() in EFFORT_WORDS:
                effort_flags.append(
                    Flag(
                        start, stop,
                        Issue(
                            "Effort vs Accomplishment",
                            "The word '{word}' tends to speak about effort more than accomplishment."
                            .format(word=word),
                            "Try replacing with phrasing that emphasizes accomplishment.",
                            bias=Issue.negative_result)))
            if word.lower() in ACCOMPLISHMENT_WORDS:
                accomplishment_flags.append(
                    Flag(
                        start, stop,
                        Issue(
                            "Effort vs Accomplishment",
                            "The word '{word}' tends to speak about accomplishment more than effort."
                            .format(word=word),
                            bias=Issue.positive_result)))

        for flag in effort_flags:
            report.add_flag(flag)

        if (len(accomplishment_flags) is 0
                or len(effort_flags) / len(accomplishment_flags) >
                1.2  # TODO: Arbitrary!
            ):
            # Avoid divide-by-zero errors
            if len(accomplishment_flags) == 0:
                report.set_summary(
                    "This document has too few words about concrete accomplishment."
                )
            else:
                report.set_summary(
                    "This document has a high ratio ({}:{}) of words suggesting effort to words suggesting concrete accomplishment."
                    .format(len(effort_flags), len(accomplishment_flags)))

        return report
Beispiel #2
0
def test_flag_stringify():
    f1 = Flag(100, 200, Issue("Issue", "Description.", fix="Fix Me!"))
    assert str(f1) == "[100-200]: Issue: Description. (Fix Me!)"

    f2 = Flag(100, 200, Issue("Issue", "Description."))
    assert str(f2) == "[100-200]: Issue: Description."

    f3 = Flag(100, 200, Issue("Issue"))
    assert str(f3) == "[100-200]: Issue"
Beispiel #3
0
    def get_report(self, doc):
        """
        Generates a report on the text based upon effort vs accomplishment.

        Also adds a summary if there are NO words about accomplishment, or if
        the ratio of effort to accomplishment words is particularly low.

        Arguments:
            doc (Document): The document to check

        Returns:
            Report

        """
        report = Report("\nTerms focusing on effort vs accomplishment")
        effort_flags = []
        accomplishment_flags = []

        token_indices = doc.words_with_indices()

        for word, start, stop in token_indices:
            # NELSON - changed loop so word can be found in EFFORT or ACCOMPLISHMENT words
            if word.lower() in EFFORT_WORDS:
                effort_flags.append(
                    Flag(
                        start, stop,
                        Issue("{word}".format(word=word),
                              bias=Issue.negative_result)))
            if word.lower() in ACCOMPLISHMENT_WORDS:
                accomplishment_flags.append(
                    Flag(
                        start, stop,
                        Issue(
                            "'{word}' describes accomplishment more than effort"
                            .format(word=word),
                            bias=Issue.positive_result)))

        for flag in effort_flags:
            report.add_flag(flag)

        if (len(accomplishment_flags) is 0
                or len(effort_flags) / len(accomplishment_flags) >
                1.2  # TODO: Arbitrary!
            ):
            # Avoid divide-by-zero errors
            if len(accomplishment_flags) == 0:
                report.set_summary(
                    "Too few words about concrete accomplishments")
            else:
                report.set_summary(
                    "High ratio ({}:{}) of words suggesting effort ratherthan accomplishment."
                    .format(len(effort_flags), len(accomplishment_flags)))

        return report
Beispiel #4
0
    def get_report(self, doc):
        nativespeaker_report = Report(
            "\nTerms biased towards native speakers:")
        words_with_indices = doc.words_with_indices()
        #print(words_with_indices)

        found = False
        for word, start, stop in words_with_indices:
            word = word.lower()
            for nativeword in NATIVE_WORDS.Used:
                x = re.search(nativeword, word)
                if (x):
                    #print(x.span(), x.string, x.group())
                    found = True
                    if NATIVE_WORDS['Recommend2'].loc[
                            NATIVE_WORDS['Used'] ==
                            x.group()].item() == 'none':
                        recommend2 = ""
                    else:
                        recommend2 = " or '" + NATIVE_WORDS['Recommend2'].loc[
                            NATIVE_WORDS['Used'] == x.group()].item() + "'"

                    print(
                        "Consider replacing '", x.group(), "' with '",
                        NATIVE_WORDS['Recommend1'].loc[NATIVE_WORDS['Used'] ==
                                                       x.group()].item(), "'",
                        recommend2)
                    nativespeaker_report.add_flag(
                        Flag(start, stop, Issue(word)))
        if found:
            nativespeaker_report.set_summary(
                "To encourage non-native speakers, use short words and simple sentences"
            )
        return nativespeaker_report
Beispiel #5
0
    def get_flags(self, doc: 'Document') -> List['Flag']:
        """
        Flag a document (globally) if we cannot find any research products.

        Returns only a single flag if no publications/resources are mentioned.
        """
        all_flags = []
        # TODO: Any other flags needed here?

        # Sum up all of the probabilities of all publications. This is a bit
        # janky, but it acts as a proxy for the total number of publications
        # mentioned. For example, if there are two potential publications each
        # with a probability of 50%, then we could consider that a mention of
        # one single publication.
        pub_count = sum(identify_publications(doc).values())
        if pub_count < self.min_publications:
            all_flags.append(Flag(
                0, 0,
                Issue(
                    "Publications",
                    "This document does not mention many publications.",
                    "Try referencing more concrete publications or work "
                    "byproducts, if possible."
                ))
            )
        return all_flags
Beispiel #6
0
    def get_report(self, doc):
        """
        Report the usage of unnecessarily gendered words.

        Arguments:
            doc (Document): The document to check

        Returns:
            Report

        """
        report = Report("\nUnnecessary use of gender terms")

        token_indices = doc.words_with_indices()

        found = False
        for word, start, stop in token_indices:
            # NELSON - changed loop so word can be found in GENDERED_WORDS
            if word.lower() in GENDERED_WORDS:
                found = True
                report.add_flag(
                    Flag(start, stop, Issue("{word}".format(word=word))))

        if found:
            report.set_summary(
                "Replace gender terms with 'person' or 'individual', or a position-specific term, such as 'doctor' or 'author'"
            )
        return report
Beispiel #7
0
    def get_report(self, doc):
        """
        Report the usage of unnecessarily gendered words.

        Arguments:
            doc (Document): The document to check

        Returns:
            Report

        """
        report = Report("Unnecessarily Gendered Words")

        token_indices = doc.words_with_indices()

        for word, start, stop in token_indices:
            if word.lower() in GENDERED_WORDS:
                report.add_flag(
                    Flag(start, stop, Issue(
                        "Unnecessarily Gendered Words",
                        "The word '{word}' is unneccesarily gendered.".format(
                            word=word),
                        "Replace this term with 'person' or 'individual', or a position-specific phrase like 'doctor' or 'author'.",
                        bias=Issue.negative_result
                    ))
                )

        return report
Beispiel #8
0
    def get_report(self, doc):
        """
        Generate a report on the text based upon mentions of
        personal-life-related words.

        Arguments:
            doc (Document): The document to check

        Returns:
            Report

        """
        report = Report("Personal Life")

        token_indices = doc.words_with_indices()

        for word, start, stop in token_indices:
            if word.lower() in PERSONAL_LIFE_TERMS:
                report.add_flag(
                    Flag(start, stop, Issue(
                        "Personal Life",
                        "The word {word} tends to relate to personal life.".format(word=word),
                        "Try replacing with a sentiment about professional life."
                    ))
                )
        return report
Beispiel #9
0
    def get_report(self, doc):
        """
        Generates a report on the text that checks for curbed superlatives.

        These are phrases like "the best woman for the job" or "best of
        all women" that are clear 'hedged' superlatives.


        Arguments:
            doc (Document): The document to check

        Returns:
            Report

        """
        report = Report("Conditional Superlatives")

        text = doc.text()
        for regex in CONDITIONAL_SUPERLATIVE_REGEXES:
            for match in re.finditer(regex, text):
                report.add_flag(
                    Flag(match.span()[0], match.span()[1], Issue(
                        "Conditional Superlative",
                        "This phrase appears to hedge a superlative to apply only to women."
                    ))
                )

        return report
Beispiel #10
0
    def get_report(self, doc):
        """
        Generate a report on the text based upon mentions of
        personal-life-related words.

        Arguments:
            doc (Document): The document to check

        Returns:
            Report

        """
        report = Report("\nTerms about personal life")

        token_indices = doc.words_with_indices()

        found = False
        for word, start, stop in token_indices:
            # NELSON - changed loop to look for word in lis of PERSONAL_LIFE_TERMS
            if word.lower() in PERSONAL_LIFE_TERMS:
                found = True
                report.add_flag(
                    Flag(start, stop, Issue("{word}".format(word=word))))

        if found:
            report.set_summary('Found words relating to personal life')
        return report
Beispiel #11
0
    def get_report(self, doc):
        male_report = Report("\nTerms biased towards men:")
        words_with_indices = doc.words_with_indices()
        #print(words_with_indices)

        found = False
        for word, start, stop in words_with_indices:
            word = word.lower()
            for maleword in MALE_WORDS:
                searchTerm = "^" + maleword + ".."
                x = re.search(searchTerm, word)
                if (x):
                    #print(x.span(), x.string, x.group())
                    found = True
                    male_report.add_flag(
                        Flag(start, stop, Issue("{word}".format(word=word))))
        if found:
            male_report.set_summary(
                "Depending on context, these words may be biased towards recruiting men"
            )
        return male_report
Beispiel #12
0
    def get_report(self, doc: Document):
        """
        Generates a report on the text based upon effort vs accomplishment.

        Also adds a summary if there are NO words about accomplishment, or if
        the ratio of effort to accomplishment words is particularly low.

        Arguments:
            doc (Document): The document to check

        Returns:
            Report

        """
        report = Report("Effort vs Accomplishment")

        # Keep track of accomplishment- or effort-specific words:
        accomplishment_words = 0
        effort_words = 0

        # Keep track of flags (we'll deduplicate them before reporting)
        flags = set()

        for word, start, stop in doc.words_with_indices():
            if word.lower() in EFFORT_WORDS:
                report.add_flag(
                    Flag(
                        start,
                        stop,
                        Issue(
                            "Effort vs Accomplishment",
                            f"The word '{word}' tends to speak more about " +
                            "effort than concrete accomplishment.",
                            # lower negative bias because this may be spurious.
                            # Specifically, the presence of these words doesn't
                            # mean that it's being attributed to the subject of
                            # the letter.
                            bias=Issue.negative_result * 0.5,
                            fix="Speak about concrete achievement rather " +
                            "than abstract effort.",
                        ),
                    ))
            if word.lower() in ACCOMPLISHMENT_WORDS:
                report.add_flag(
                    Flag(
                        start,
                        stop,
                        Issue(
                            "Effort vs Accomplishment",
                            f"The word '{word}' illustrates concrete accomplishment.",
                            # lower positive valence because this may be spurious.
                            # Specifically, the presence of these words doesn't
                            # mean that it's being attributed to the subject of
                            # the letter.
                            bias=Issue.positive_result * 0.5,
                        ),
                    ))

        doc = nlp(doc.text())

        # Loop over tokens to find adjectives to flag:
        for token in doc:
            # Find all tokens whose dependency tag is adjectival complement:
            if token.dep_ == "acomp":
                # Get all dependencies of the head/root of the tagged sentence
                # and look for nouns (which are likely to be the referenced
                # subject of this adjectival complement):
                for reference_token in token.head.children:
                    # If this token IS a noun but it's an ignored pronoun, move on
                    if (reference_token.pos_ in ["PRON", "PROPN"] and
                            reference_token.text not in _PRONOUNS_TO_IGNORE):
                        # If accomplishment-flavored, add positive flag.
                        if token.text in ACCOMPLISHMENT_WORDS:
                            accomplishment_words += 1
                            warning = (
                                f"The word '{token.text}' refers to " +
                                "explicit accomplishment rather than effort.")
                            suggestion = ""
                            bias = Issue.positive_result

                        # If effort-flavored, add negative flag.
                        elif token.text in EFFORT_WORDS:
                            effort_words += 1
                            warning = (
                                f"The word '{token.text}' tends to speak " +
                                "about effort more than accomplishment.")
                            suggestion = ("Try replacing with phrasing that " +
                                          "emphasizes accomplishment.")
                            bias = Issue.negative_result

                        else:
                            continue

                        flags.add((
                            token.sent.start_char,
                            token.sent.end_char,
                            warning,
                            suggestion,
                            bias,
                        ))

        for (start, stop, warning, suggestion, bias) in flags:
            # Add a flag to the report:
            report.add_flag(
                Flag(
                    start,
                    stop,
                    Issue("Effort vs Accomplishment",
                          warning,
                          suggestion,
                          bias=bias),
                ))

        if 0 < effort_words <= accomplishment_words:
            report.set_summary(
                "This document has a high ratio of words suggesting " +
                f"effort ({effort_words}) to words suggesting " +
                f"concrete accomplishment ({effort_words}).", )

        return report
Beispiel #13
0
def test_flag_requires_issue():
    with pytest.raises(ValueError):
        f = Flag(100, 200, "Invalid issue")
Beispiel #14
0
from genderbias.detector import Report, Issue, Flag

from pytest import fixture

report_name = "Text Analyzer"
summary = "[summary]"
flag = Flag(0, 10, Issue(report_name, "A", "B"))


@fixture
def report():
    return Report(report_name)


def test_report_str_no_flags(report):
    assert str(
        report) == report_name + "\n" + " SUMMARY: " + "[None available]"


def test_report_str_with_one_flag(report):
    report.add_flag(flag)
    expected = (report_name + "\n [0-10]: " + report_name + ": A (B)" + "\n" +
                " SUMMARY: " + "[None available]")
    assert str(report) == expected


def test_report_str_no_flags_with_summary(report):
    report.set_summary(summary)
    assert str(report) == report_name + "\n" + " SUMMARY: " + summary

Beispiel #15
0
from genderbias.detector import Report, Issue, Flag, BiasBoundsException

from pytest import fixture, raises

report_name = "Text Analyzer"
summary = "[summary]"
flag = Flag(0, 10, Issue(report_name, "A", "B"))
positive_flag = Flag(20, 30, Issue(report_name, "C", "D", bias = Issue.positive_result))

no_summary_text = " SUMMARY: [None available]"
flag_text = " [0-10]: " + report_name + ": A (B)"

base_dict = {'name': report_name, 'summary': "", 'flags': []}
positive_flag_tuple = (20, 30, report_name, "C", "D", +1.0)
negative_flag_tuple = (0, 10, report_name, "A", "B", -1.0)

@fixture
def report():
    return Report(report_name)


def test_report_str_no_flags(report):
    assert str(report) == "\n".join([report_name, no_summary_text])

def test_report_str_with_one_flag(report):
    report.add_flag(flag)
    assert str(report) == "\n".join([report_name, flag_text, no_summary_text])

def test_report_str_no_flags_with_summary(report):
    report.set_summary(summary)
    assert str(report) == "\n".join([report_name, " SUMMARY: " + summary])