def get_report(self, doc): """ Generates a report on the text based upon effort vs accomplishment. Also adds a summary if there are NO words about accomplishment, or if the ratio of effort to accomplishment words is particularly low. Arguments: doc (Document): The document to check Returns: Report """ report = Report("Effort vs Accomplishment") effort_flags = [] accomplishment_flags = [] token_indices = doc.words_with_indices() for word, start, stop in token_indices: if word.lower() in EFFORT_WORDS: effort_flags.append( Flag( start, stop, Issue( "Effort vs Accomplishment", "The word '{word}' tends to speak about effort more than accomplishment." .format(word=word), "Try replacing with phrasing that emphasizes accomplishment.", bias=Issue.negative_result))) if word.lower() in ACCOMPLISHMENT_WORDS: accomplishment_flags.append( Flag( start, stop, Issue( "Effort vs Accomplishment", "The word '{word}' tends to speak about accomplishment more than effort." .format(word=word), bias=Issue.positive_result))) for flag in effort_flags: report.add_flag(flag) if (len(accomplishment_flags) is 0 or len(effort_flags) / len(accomplishment_flags) > 1.2 # TODO: Arbitrary! ): # Avoid divide-by-zero errors if len(accomplishment_flags) == 0: report.set_summary( "This document has too few words about concrete accomplishment." ) else: report.set_summary( "This document has a high ratio ({}:{}) of words suggesting effort to words suggesting concrete accomplishment." .format(len(effort_flags), len(accomplishment_flags))) return report
def test_flag_stringify(): f1 = Flag(100, 200, Issue("Issue", "Description.", fix="Fix Me!")) assert str(f1) == "[100-200]: Issue: Description. (Fix Me!)" f2 = Flag(100, 200, Issue("Issue", "Description.")) assert str(f2) == "[100-200]: Issue: Description." f3 = Flag(100, 200, Issue("Issue")) assert str(f3) == "[100-200]: Issue"
def get_report(self, doc): """ Generates a report on the text based upon effort vs accomplishment. Also adds a summary if there are NO words about accomplishment, or if the ratio of effort to accomplishment words is particularly low. Arguments: doc (Document): The document to check Returns: Report """ report = Report("\nTerms focusing on effort vs accomplishment") effort_flags = [] accomplishment_flags = [] token_indices = doc.words_with_indices() for word, start, stop in token_indices: # NELSON - changed loop so word can be found in EFFORT or ACCOMPLISHMENT words if word.lower() in EFFORT_WORDS: effort_flags.append( Flag( start, stop, Issue("{word}".format(word=word), bias=Issue.negative_result))) if word.lower() in ACCOMPLISHMENT_WORDS: accomplishment_flags.append( Flag( start, stop, Issue( "'{word}' describes accomplishment more than effort" .format(word=word), bias=Issue.positive_result))) for flag in effort_flags: report.add_flag(flag) if (len(accomplishment_flags) is 0 or len(effort_flags) / len(accomplishment_flags) > 1.2 # TODO: Arbitrary! ): # Avoid divide-by-zero errors if len(accomplishment_flags) == 0: report.set_summary( "Too few words about concrete accomplishments") else: report.set_summary( "High ratio ({}:{}) of words suggesting effort ratherthan accomplishment." .format(len(effort_flags), len(accomplishment_flags))) return report
def get_report(self, doc): nativespeaker_report = Report( "\nTerms biased towards native speakers:") words_with_indices = doc.words_with_indices() #print(words_with_indices) found = False for word, start, stop in words_with_indices: word = word.lower() for nativeword in NATIVE_WORDS.Used: x = re.search(nativeword, word) if (x): #print(x.span(), x.string, x.group()) found = True if NATIVE_WORDS['Recommend2'].loc[ NATIVE_WORDS['Used'] == x.group()].item() == 'none': recommend2 = "" else: recommend2 = " or '" + NATIVE_WORDS['Recommend2'].loc[ NATIVE_WORDS['Used'] == x.group()].item() + "'" print( "Consider replacing '", x.group(), "' with '", NATIVE_WORDS['Recommend1'].loc[NATIVE_WORDS['Used'] == x.group()].item(), "'", recommend2) nativespeaker_report.add_flag( Flag(start, stop, Issue(word))) if found: nativespeaker_report.set_summary( "To encourage non-native speakers, use short words and simple sentences" ) return nativespeaker_report
def get_flags(self, doc: 'Document') -> List['Flag']: """ Flag a document (globally) if we cannot find any research products. Returns only a single flag if no publications/resources are mentioned. """ all_flags = [] # TODO: Any other flags needed here? # Sum up all of the probabilities of all publications. This is a bit # janky, but it acts as a proxy for the total number of publications # mentioned. For example, if there are two potential publications each # with a probability of 50%, then we could consider that a mention of # one single publication. pub_count = sum(identify_publications(doc).values()) if pub_count < self.min_publications: all_flags.append(Flag( 0, 0, Issue( "Publications", "This document does not mention many publications.", "Try referencing more concrete publications or work " "byproducts, if possible." )) ) return all_flags
def get_report(self, doc): """ Report the usage of unnecessarily gendered words. Arguments: doc (Document): The document to check Returns: Report """ report = Report("\nUnnecessary use of gender terms") token_indices = doc.words_with_indices() found = False for word, start, stop in token_indices: # NELSON - changed loop so word can be found in GENDERED_WORDS if word.lower() in GENDERED_WORDS: found = True report.add_flag( Flag(start, stop, Issue("{word}".format(word=word)))) if found: report.set_summary( "Replace gender terms with 'person' or 'individual', or a position-specific term, such as 'doctor' or 'author'" ) return report
def get_report(self, doc): """ Report the usage of unnecessarily gendered words. Arguments: doc (Document): The document to check Returns: Report """ report = Report("Unnecessarily Gendered Words") token_indices = doc.words_with_indices() for word, start, stop in token_indices: if word.lower() in GENDERED_WORDS: report.add_flag( Flag(start, stop, Issue( "Unnecessarily Gendered Words", "The word '{word}' is unneccesarily gendered.".format( word=word), "Replace this term with 'person' or 'individual', or a position-specific phrase like 'doctor' or 'author'.", bias=Issue.negative_result )) ) return report
def get_report(self, doc): """ Generate a report on the text based upon mentions of personal-life-related words. Arguments: doc (Document): The document to check Returns: Report """ report = Report("Personal Life") token_indices = doc.words_with_indices() for word, start, stop in token_indices: if word.lower() in PERSONAL_LIFE_TERMS: report.add_flag( Flag(start, stop, Issue( "Personal Life", "The word {word} tends to relate to personal life.".format(word=word), "Try replacing with a sentiment about professional life." )) ) return report
def get_report(self, doc): """ Generates a report on the text that checks for curbed superlatives. These are phrases like "the best woman for the job" or "best of all women" that are clear 'hedged' superlatives. Arguments: doc (Document): The document to check Returns: Report """ report = Report("Conditional Superlatives") text = doc.text() for regex in CONDITIONAL_SUPERLATIVE_REGEXES: for match in re.finditer(regex, text): report.add_flag( Flag(match.span()[0], match.span()[1], Issue( "Conditional Superlative", "This phrase appears to hedge a superlative to apply only to women." )) ) return report
def get_report(self, doc): """ Generate a report on the text based upon mentions of personal-life-related words. Arguments: doc (Document): The document to check Returns: Report """ report = Report("\nTerms about personal life") token_indices = doc.words_with_indices() found = False for word, start, stop in token_indices: # NELSON - changed loop to look for word in lis of PERSONAL_LIFE_TERMS if word.lower() in PERSONAL_LIFE_TERMS: found = True report.add_flag( Flag(start, stop, Issue("{word}".format(word=word)))) if found: report.set_summary('Found words relating to personal life') return report
def get_report(self, doc): male_report = Report("\nTerms biased towards men:") words_with_indices = doc.words_with_indices() #print(words_with_indices) found = False for word, start, stop in words_with_indices: word = word.lower() for maleword in MALE_WORDS: searchTerm = "^" + maleword + ".." x = re.search(searchTerm, word) if (x): #print(x.span(), x.string, x.group()) found = True male_report.add_flag( Flag(start, stop, Issue("{word}".format(word=word)))) if found: male_report.set_summary( "Depending on context, these words may be biased towards recruiting men" ) return male_report
def get_report(self, doc: Document): """ Generates a report on the text based upon effort vs accomplishment. Also adds a summary if there are NO words about accomplishment, or if the ratio of effort to accomplishment words is particularly low. Arguments: doc (Document): The document to check Returns: Report """ report = Report("Effort vs Accomplishment") # Keep track of accomplishment- or effort-specific words: accomplishment_words = 0 effort_words = 0 # Keep track of flags (we'll deduplicate them before reporting) flags = set() for word, start, stop in doc.words_with_indices(): if word.lower() in EFFORT_WORDS: report.add_flag( Flag( start, stop, Issue( "Effort vs Accomplishment", f"The word '{word}' tends to speak more about " + "effort than concrete accomplishment.", # lower negative bias because this may be spurious. # Specifically, the presence of these words doesn't # mean that it's being attributed to the subject of # the letter. bias=Issue.negative_result * 0.5, fix="Speak about concrete achievement rather " + "than abstract effort.", ), )) if word.lower() in ACCOMPLISHMENT_WORDS: report.add_flag( Flag( start, stop, Issue( "Effort vs Accomplishment", f"The word '{word}' illustrates concrete accomplishment.", # lower positive valence because this may be spurious. # Specifically, the presence of these words doesn't # mean that it's being attributed to the subject of # the letter. bias=Issue.positive_result * 0.5, ), )) doc = nlp(doc.text()) # Loop over tokens to find adjectives to flag: for token in doc: # Find all tokens whose dependency tag is adjectival complement: if token.dep_ == "acomp": # Get all dependencies of the head/root of the tagged sentence # and look for nouns (which are likely to be the referenced # subject of this adjectival complement): for reference_token in token.head.children: # If this token IS a noun but it's an ignored pronoun, move on if (reference_token.pos_ in ["PRON", "PROPN"] and reference_token.text not in _PRONOUNS_TO_IGNORE): # If accomplishment-flavored, add positive flag. if token.text in ACCOMPLISHMENT_WORDS: accomplishment_words += 1 warning = ( f"The word '{token.text}' refers to " + "explicit accomplishment rather than effort.") suggestion = "" bias = Issue.positive_result # If effort-flavored, add negative flag. elif token.text in EFFORT_WORDS: effort_words += 1 warning = ( f"The word '{token.text}' tends to speak " + "about effort more than accomplishment.") suggestion = ("Try replacing with phrasing that " + "emphasizes accomplishment.") bias = Issue.negative_result else: continue flags.add(( token.sent.start_char, token.sent.end_char, warning, suggestion, bias, )) for (start, stop, warning, suggestion, bias) in flags: # Add a flag to the report: report.add_flag( Flag( start, stop, Issue("Effort vs Accomplishment", warning, suggestion, bias=bias), )) if 0 < effort_words <= accomplishment_words: report.set_summary( "This document has a high ratio of words suggesting " + f"effort ({effort_words}) to words suggesting " + f"concrete accomplishment ({effort_words}).", ) return report
from genderbias.detector import Report, Issue, Flag from pytest import fixture report_name = "Text Analyzer" summary = "[summary]" flag = Flag(0, 10, Issue(report_name, "A", "B")) @fixture def report(): return Report(report_name) def test_report_str_no_flags(report): assert str( report) == report_name + "\n" + " SUMMARY: " + "[None available]" def test_report_str_with_one_flag(report): report.add_flag(flag) expected = (report_name + "\n [0-10]: " + report_name + ": A (B)" + "\n" + " SUMMARY: " + "[None available]") assert str(report) == expected def test_report_str_no_flags_with_summary(report): report.set_summary(summary) assert str(report) == report_name + "\n" + " SUMMARY: " + summary
def test_issue_bias_bounds(): with raises(BiasBoundsException): Issue("", bias=Issue.positive_result+0.0000001) with raises(BiasBoundsException): Issue("", bias=Issue.negative_result-0.0000001)
from genderbias.detector import Report, Issue, Flag, BiasBoundsException from pytest import fixture, raises report_name = "Text Analyzer" summary = "[summary]" flag = Flag(0, 10, Issue(report_name, "A", "B")) positive_flag = Flag(20, 30, Issue(report_name, "C", "D", bias = Issue.positive_result)) no_summary_text = " SUMMARY: [None available]" flag_text = " [0-10]: " + report_name + ": A (B)" base_dict = {'name': report_name, 'summary': "", 'flags': []} positive_flag_tuple = (20, 30, report_name, "C", "D", +1.0) negative_flag_tuple = (0, 10, report_name, "A", "B", -1.0) @fixture def report(): return Report(report_name) def test_report_str_no_flags(report): assert str(report) == "\n".join([report_name, no_summary_text]) def test_report_str_with_one_flag(report): report.add_flag(flag) assert str(report) == "\n".join([report_name, flag_text, no_summary_text]) def test_report_str_no_flags_with_summary(report): report.set_summary(summary) assert str(report) == "\n".join([report_name, " SUMMARY: " + summary])