def test_log(): log_five = modifiers.log(5) assert solve(log_five) == math_log(5) assert solve(pickle.loads(pickle.dumps(log_five))) == math_log(5) assert repr(log_five) == "<feature.log(5)>"
def test_trim(): d1 = Datasource("derp1") f1 = Feature("foobar1", returns=int) f2 = Feature("foobar2", returns=int, depends_on=[d1]) c = Constant(value=5) fv = FeatureVector("foobar3", returns=int, depends_on=[c]) assert list(trim(f1)) == [f1] assert list(trim([f1, f2, fv])) == [f1, f2, fv] assert (list(trim(log(max(f1 - f2, 1)))) == [f1, f2])
from revscoring.features import Feature, wikitext from revscoring.features.modifiers import div, log, max, sub def _process_new_longest(p_longest, r_longest): if r_longest > p_longest: return r_longest else: return 1 parent = [ log(wikitext.revision.parent.chars + 1), log(wikitext.revision.parent.tokens + 1), log(wikitext.revision.parent.words + 1), log(wikitext.revision.parent.uppercase_words + 1), log(wikitext.revision.parent.headings + 1), log(wikitext.revision.parent.wikilinks + 1), log(wikitext.revision.parent.external_links + 1), log(wikitext.revision.parent.templates + 1), log(wikitext.revision.parent.ref_tags + 1), div(wikitext.revision.parent.chars, max(wikitext.revision.parent.words, 1), name="revision.parent.chars_per_word"), div(wikitext.revision.parent.words, max(wikitext.revision.parent.tokens, 1), name="revision.parent.words_per_token"), div(wikitext.revision.parent.uppercase_words, max(wikitext.revision.parent.words, 1), name="revision.parent.uppercase_words_per_word"), div(wikitext.revision.parent.markups,
from revscoring.features import ( diff, page, parent_revision, previous_user_revision, revision, user ) from revscoring.features.modifiers import log from . import generic damaging = generic.damaging + [ log(max(diff.added_badwords_ratio + 1,1)), log(max(diff.added_misspellings_ratio + 1,1)), log(max(diff.badwords_added + 1,1)), log(max(diff.badwords_removed + 1,1)), log(max(diff.misspellings_added + 1,1)), log(max(diff.misspellings_removed + 1,1)), log(max(diff.proportion_of_badwords_added + 1,1)), log(max(diff.proportion_of_badwords_removed + 1,1)), log(max(diff.proportion_of_misspellings_added + 1,1)), log(max(diff.proportion_of_misspellings_removed + 1,1)), log(max(diff.removed_badwords_ratio + 1,1)), log(max(diff.removed_misspellings_ratio + 1,1)), log(max(parent_revision.badwords + 1,1)), log(max(parent_revision.misspellings + 1,1)), log(max(parent_revision.proportion_of_badwords + 1,1)), log(max(parent_revision.proportion_of_misspellings + 1,1)), log(max(revision.badwords + 1,1)), log(max(revision.misspellings + 1,1)), log(max(revision.proportion_of_badwords + 1,1)), log(max(revision.proportion_of_misspellings + 1,1)), log(revision.infonoise + 1) ]
from revscoring.features import diff, page, parent_revision, user from revscoring.features.modifiers import log no_lang_damaging = [ log(diff.added_symbolic_chars_ratio + 1), log(diff.chars_added + 1), log(diff.chars_removed + 1), diff.longest_repeated_char_added, diff.longest_token_added, log(diff.markup_chars_added + 1), log(diff.markup_chars_removed + 1), log(diff.numeric_chars_added + 1), log(diff.numeric_chars_removed + 1), diff.proportion_of_chars_added, diff.proportion_of_chars_removed, diff.proportion_of_markup_chars_added, diff.proportion_of_numeric_chars_added, diff.proportion_of_symbolic_chars_added, diff.proportion_of_uppercase_chars_added, log(diff.segments_added + 1), log(diff.segments_removed + 1), log(diff.symbolic_chars_added + 1), log(diff.symbolic_chars_removed + 1), log(diff.uppercase_chars_added + 1), log(diff.uppercase_chars_removed + 1), diff.bytes_changed + 1, diff.bytes_changed_ratio, page.is_content_namespace, parent_revision.was_same_user, user.is_bot ]
category_links / max(wikitext.revision.content_chars, 1), cite_templates, cite_templates / max(wikitext.revision.content_chars, 1), proportion_of_templated_references, non_templated_references, non_templated_references / max(wikitext.revision.content_chars, 1), non_cite_templates, non_cite_templates / max(wikitext.revision.content_chars, 1), infobox_templates, cn_templates + 1, cn_templates / max(wikitext.revision.content_chars, 1), who_templates + 1, who_templates / max(wikitext.revision.content_chars, 1), main_article_templates, main_article_templates / max(wikitext.revision.content_chars, 1), (english.stemmed.revision.stem_chars / max(wikitext.revision.content_chars, 1)), log(paragraphs_without_refs_total_length + 1), ] wp10 = wikipedia.article + local_wiki """ Based largely on work by Morten Warncke-Wang et al.[1] and with a few improvements and extensions that Morten identified after publication. 1. Warncke-Wang, M., Cosley, D., & Riedl, J. (2013, August). Tell me more: An actionable quality model for wikipedia. In Proceedings of the 9th International Symposium on Open Collaboration (p. 8). ACM. http://opensym.org/wsos2013/proceedings/p0202-warncke.pdf """
max(french.diff.words_removed, 1) proportion_of_misspellings_added = french.diff.misspellings_added / \ max(french.diff.words_added, 1) proportion_of_misspellings_removed = french.diff.misspellings_removed / \ max(french.diff.words_removed, 1) proportion_of_badwords = french.parent_revision.badwords / \ max(french.parent_revision.words, 1) proportion_of_misspellings = french.parent_revision.misspellings / \ max(french.parent_revision.words, 1) added_badwords_ratio = proportion_of_badwords_added / \ max(proportion_of_badwords, 0.01) added_misspellings_ratio = proportion_of_misspellings_added / \ max(proportion_of_misspellings, 0.01) damaging = util.no_lang_damaging + enwiki.badwords + enwiki.informals + [ log(french.diff.badwords_added + 1), log(french.diff.badwords_removed + 1), log(french.diff.misspellings_added + 1), log(french.diff.misspellings_removed + 1), proportion_of_badwords_added, proportion_of_badwords_removed, proportion_of_misspellings_added, proportion_of_misspellings_removed, added_badwords_ratio, added_misspellings_ratio ] goodfaith = damaging
from revscoring.features import wikibase from revscoring.features.modifiers import log item = [ log(wikibase.revision.claims + 1), log(wikibase.revision.properties + 1), log(wikibase.revision.aliases + 1), log(wikibase.revision.sources + 1), log(wikibase.revision.qualifiers + 1), log(wikibase.revision.badges + 1), log(wikibase.revision.labels + 1), log(wikibase.revision.sitelinks + 1), log(wikibase.revision.descriptions + 1) ]
proportion_of_informals_removed = estonian.diff.informals_removed / max(estonian.diff.words_removed, 1) proportion_of_badwords = estonian.parent_revision.badwords / max(estonian.parent_revision.words, 1) proportion_of_misspellings = estonian.parent_revision.misspellings / max(estonian.parent_revision.words, 1) proportion_of_informals = estonian.parent_revision.informals / max(estonian.parent_revision.words, 1) added_badwords_ratio = proportion_of_badwords_added / max(proportion_of_badwords, 0.01) added_misspellings_ratio = proportion_of_misspellings_added / max(proportion_of_misspellings, 0.01) added_informals_ratio = proportion_of_informals_added / max(proportion_of_informals, 0.01) damaging = ( util.no_lang_damaging + enwiki.badwords + enwiki.informals + [ log(estonian.diff.badwords_added + 1), log(estonian.diff.badwords_removed + 1), log(estonian.diff.informals_added + 1), log(estonian.diff.informals_removed + 1), log(estonian.diff.misspellings_added + 1), log(estonian.diff.misspellings_removed + 1), proportion_of_badwords_added, proportion_of_badwords_removed, proportion_of_informals_added, proportion_of_informals_removed, proportion_of_misspellings_added, proportion_of_misspellings_removed, added_badwords_ratio, added_informals_ratio, added_misspellings_ratio, ]
# log(revscoring.features.diff.numeric_chars_added + 1), # log(revscoring.features.diff.numeric_chars_removed + 1), # revscoring.features.diff.proportion_of_chars_added, # revscoring.features.diff.proportion_of_chars_removed, # revscoring.features.diff.proportion_of_numeric_chars_added, # revscoring.features.diff.proportion_of_symbolic_chars_added, # revscoring.features.diff.proportion_of_uppercase_chars_added, # log(revscoring.features.diff.symbolic_chars_added + 1), # log(revscoring.features.diff.symbolic_chars_removed + 1), # log(revscoring.features.diff.uppercase_chars_added + 1), # log(revscoring.features.diff.uppercase_chars_removed + 1), # revscoring.features.diff.bytes_changed + 1, # revscoring.featuresdiff.bytes_changed_ratio, # page.is_content_namespace, # parent_revision.was_same_user, log(user.age + 1), diff.number_added_sitelinks, diff.number_removed_sitelinks, diff.number_changed_sitelinks, diff.number_added_labels, diff.number_removed_labels, diff.number_changed_labels, diff.number_added_descriptions, diff.number_removed_descriptions, diff.number_changed_descriptions, diff.number_added_aliases, diff.number_removed_aliases, diff.number_added_claims, diff.number_removed_claims, diff.number_changed_claims, diff.number_changed_identifiers,
revision_oriented.revision.user.in_group({'bot'}, name="revision.user.is_bot"), revision_oriented.revision.user.in_group( {'checkuser', 'bureaucrat', 'oversight', 'steward'}, name="revision.user.has_advanced_rights"), revision_oriented.revision.user.in_group({'sysop'}, name="revision.user.is_admin"), revision_oriented.revision.user.in_group( { 'browsearchive', 'deletedhistory', 'interface-editor', 'noratelimit', 'accountcreator', 'massmessage-sender', 'templateeditor', 'autopatrolled', 'propertycreator', 'centralnoticeadmin' }, name="revision.user.is_trusted"), revision_oriented.revision.user.in_group( { 'rollback', 'abusefilter', 'patroller', 'reviewer', 'autoreview', 'autoreviewer', 'editor', 'autoeditor', 'eliminator' }, name="revision.user.is_patroller"), revision_oriented.revision.user.in_group( {'import', 'filemover', 'suppressredirect'}, name="revision.user.is_curator") ] protected_user = [ revision_oriented.revision.user.is_anon, log(temporal.revision.user.seconds_since_registration + 1) ]
] user_rights = [ revision_oriented.revision.user.in_group( {'bot'}, name="revision.user.is_bot"), revision_oriented.revision.user.in_group( {'checkuser', 'bureaucrat', 'oversight', 'steward'}, name="revision.user.has_advanced_rights"), revision_oriented.revision.user.in_group( {'sysop'}, name="revision.user.is_admin"), revision_oriented.revision.user.in_group( {'browsearchive', 'deletedhistory', 'interface-editor', 'noratelimit', 'accountcreator', 'massmessage-sender', 'templateeditor', 'autopatrolled', 'propertycreator', 'centralnoticeadmin'}, name="revision.user.is_trusted"), revision_oriented.revision.user.in_group( {'rollback', 'abusefilter', 'patroller', 'reviewer', 'autoreview', 'autoreviewer', 'editor', 'autoeditor', 'eliminator'}, name="revision.user.is_patroller"), revision_oriented.revision.user.in_group( {'import', 'filemover', 'suppressredirect'}, name="revision.user.is_curator") ] protected_user = [ revision_oriented.revision.user.is_anon, log(temporal.revision.user.seconds_since_registration + 1) ]
proportion_of_badwords = hebrew.parent_revision.badwords / \ max(hebrew.parent_revision.words, 1) proportion_of_misspellings = hebrew.parent_revision.misspellings / \ max(hebrew.parent_revision.words, 1) proportion_of_informals = hebrew.parent_revision.informals / \ max(hebrew.parent_revision.words, 1) added_badwords_ratio = proportion_of_badwords_added / \ max(proportion_of_badwords, 0.01) added_misspellings_ratio = proportion_of_misspellings_added / \ max(proportion_of_misspellings, 0.01) added_informals_ratio = proportion_of_informals_added / \ max(proportion_of_informals, 0.01) damaging = util.no_lang_damaging + enwiki.badwords + enwiki.informals + [ log(hebrew.diff.badwords_added + 1), log(hebrew.diff.badwords_removed + 1), log(hebrew.diff.informals_added + 1), log(hebrew.diff.informals_removed + 1), log(hebrew.diff.misspellings_added + 1), log(hebrew.diff.misspellings_removed + 1), proportion_of_badwords_added, proportion_of_badwords_removed, proportion_of_informals_added, proportion_of_informals_removed, proportion_of_misspellings_added, proportion_of_misspellings_removed, added_badwords_ratio, added_informals_ratio, added_misspellings_ratio ]
from revscoring.features import Feature, wikitext from revscoring.features.modifiers import div, log, max, sub def _process_new_longest(p_longest, r_longest): if r_longest > p_longest: return r_longest else: return 1 parent = [ log(wikitext.revision.parent.chars + 1), log(wikitext.revision.parent.tokens + 1), log(wikitext.revision.parent.words + 1), log(wikitext.revision.parent.uppercase_words + 1), log(wikitext.revision.parent.headings + 1), log(wikitext.revision.parent.wikilinks + 1), log(wikitext.revision.parent.external_links + 1), log(wikitext.revision.parent.templates + 1), log(wikitext.revision.parent.ref_tags + 1), div(wikitext.revision.parent.chars, max(wikitext.revision.parent.words, 1), name="revision.parent.chars_per_word"), div(wikitext.revision.parent.words, max(wikitext.revision.parent.tokens, 1), name="revision.parent.words_per_token"), div(wikitext.revision.parent.uppercase_words, max(wikitext.revision.parent.words, 1), name="revision.parent.uppercase_words_per_word"), div(wikitext.revision.parent.markups, max(wikitext.revision.parent.tokens, 1),
"glwiki.revision.revision", wikitext.revision.datasources, ) paragraphs = mappers.map( str, revision.paragraphs_sentences_and_whitespace, name="glwiki.revision.paragraphs" ) paragraphs_without_refs = filters.regex_matching( r"^(?!\s*$)((?!<ref>)(.|\n))*$", paragraphs, name="glwiki.revision.paragraphs_without_refs" ) paragraphs_without_refs_total_length = aggregators.sum( mappers.map(len, paragraphs_without_refs), name="glwiki.revision.paragraphs_without_refs_total_length" ) local_wiki = [ image_links, image_links / max(wikitext.revision.content_chars, 1), category_links, category_links / max(wikitext.revision.content_chars, 1), cn_templates + 1, cn_templates / max(wikitext.revision.content_chars, 1), log(paragraphs_without_refs_total_length + 1), paragraphs_without_refs_total_length / max(wikitext.revision.content_chars, 1), ] wp10 = wikipedia.article + local_wiki
proportion_of_badwords = indonesian.parent_revision.badwords / \ max(indonesian.parent_revision.words, 1) proportion_of_misspellings = indonesian.parent_revision.misspellings / \ max(indonesian.parent_revision.words, 1) proportion_of_informals = indonesian.parent_revision.informals / \ max(indonesian.parent_revision.words, 1) added_badwords_ratio = proportion_of_badwords_added / \ max(proportion_of_badwords, 0.01) added_misspellings_ratio = proportion_of_misspellings_added / \ max(proportion_of_misspellings, 0.01) added_informals_ratio = proportion_of_informals_added / \ max(proportion_of_informals, 0.01) damaging = util.no_lang_damaging + enwiki.badwords + enwiki.informals + [ log(indonesian.diff.badwords_added + 1), log(indonesian.diff.badwords_removed + 1), log(indonesian.diff.informals_added + 1), log(indonesian.diff.informals_removed + 1), log(indonesian.diff.misspellings_added + 1), log(indonesian.diff.misspellings_removed + 1), proportion_of_badwords_added, proportion_of_badwords_removed, proportion_of_informals_added, proportion_of_informals_removed, proportion_of_misspellings_added, proportion_of_misspellings_removed, added_badwords_ratio, added_informals_ratio, added_misspellings_ratio ]
r"Cn", r"Fact" ] cn_templates = templates_that_match("|".join(CN_TEMPLATES), name="enwiki.revision.cn_templates") who_templates = templates_that_match("Who", name="enwiki.revision.cn_templates") main_article_templates = templates_that_match( "Main", name="enwiki.main_article_templates") wp10 = [ revision.category_links, log(revision.content_chars + 1), log(revision.image_links + 1), revision.image_links / max(revision.content_chars, 1), log(cite_templates + 1), log((revision.templates - cite_templates) + 1), infobox_templates, english.revision.infonoise, log(revision.internal_links + 1), revision.internal_links / max(revision.content_chars, 1), revision.level_2_headings, revision.level_2_headings / max(revision.content_chars, 1), revision.level_3_headings, revision.level_3_headings / max(revision.content_chars, 1), log(revision.ref_tags + 1), revision.ref_tags / max(revision.content_chars, 1), log(max((revision.ref_tags - cite_templates) + 1, 1)),
max(english.parent_revision.words, 1) proportion_of_misspellings = english.parent_revision.misspellings / \ max(english.parent_revision.words, 1) proportion_of_informals = english.parent_revision.informals / \ max(english.parent_revision.words, 1) added_badwords_ratio = proportion_of_badwords_added / \ max(proportion_of_badwords, 0.01) added_misspellings_ratio = proportion_of_misspellings_added / \ max(proportion_of_misspellings, 0.01) added_informals_ratio = proportion_of_informals_added / \ max(proportion_of_informals, 0.01) badwords = [ added_badwords_ratio, log(english.diff.badwords_added + 1), log(english.diff.badwords_removed + 1), proportion_of_badwords_added, proportion_of_badwords_removed, ] informals = [ added_informals_ratio, log(english.diff.informals_added + 1), log(english.diff.informals_removed + 1), proportion_of_informals_added, proportion_of_informals_removed ] damaging = util.no_lang_damaging + [ log(english.diff.words_added + 1),
max(turkish.diff.words_removed, 1) proportion_of_informals_added = turkish.diff.informals_added / \ max(turkish.diff.words_added, 1) proportion_of_informals_removed = turkish.diff.informals_removed / \ max(turkish.diff.words_removed, 1) proportion_of_badwords = turkish.parent_revision.badwords / \ max(turkish.parent_revision.words, 1) proportion_of_informals = turkish.parent_revision.informals / \ max(turkish.parent_revision.words, 1) added_badwords_ratio = proportion_of_badwords_added / \ max(proportion_of_badwords, 0.01) added_informals_ratio = proportion_of_informals_added / \ max(proportion_of_informals, 0.01) damaging = util.no_lang_damaging + enwiki.badwords + enwiki.informals + [ log(turkish.diff.badwords_added + 1), log(turkish.diff.badwords_removed + 1), log(turkish.diff.informals_added + 1), log(turkish.diff.informals_removed + 1), proportion_of_badwords_added, proportion_of_badwords_removed, proportion_of_informals_added, proportion_of_informals_removed, added_badwords_ratio, added_informals_ratio ] goodfaith = damaging
proportion_of_badwords = italian.parent_revision.badwords / \ max(italian.parent_revision.words, 1) proportion_of_misspellings = italian.parent_revision.misspellings / \ max(italian.parent_revision.words, 1) proportion_of_informals = italian.parent_revision.informals / \ max(italian.parent_revision.words, 1) added_badwords_ratio = proportion_of_badwords_added / \ max(proportion_of_badwords, 0.01) added_misspellings_ratio = proportion_of_misspellings_added / \ max(proportion_of_misspellings, 0.01) added_informals_ratio = proportion_of_informals_added / \ max(proportion_of_informals, 0.01) damaging = util.no_lang_damaging + english.badwords + [ log(italian.diff.words_added + 1), log(italian.diff.words_removed + 1), log(italian.parent_revision.words + 1), log(italian.diff.badwords_added + 1), log(italian.diff.badwords_removed + 1), log(italian.diff.informals_added + 1), log(italian.diff.informals_removed + 1), log(italian.diff.misspellings_added + 1), log(italian.diff.misspellings_removed + 1), proportion_of_badwords_added, proportion_of_badwords_removed, proportion_of_informals_added, proportion_of_informals_removed, proportion_of_misspellings_added, proportion_of_misspellings_removed, added_badwords_ratio,
from revscoring.features import ( diff, page, parent_revision, previous_user_revision, revision, user ) from revscoring.features.modifiers import log from . import generic damaging = generic.damaging + [ log(diff.added_badwords_ratio + 1), log(diff.badwords_added + 1), log(diff.badwords_removed + 1), log(diff.proportion_of_badwords_added + 1), log(diff.proportion_of_badwords_removed + 1), log(diff.removed_badwords_ratio + 1), log(parent_revision.badwords + 1), log(parent_revision.proportion_of_badwords + 1), log(revision.badwords + 1), log(revision.proportion_of_badwords + 1) ] good_faith = generic.good_faith + [ log(diff.added_badwords_ratio + 1), log(diff.badwords_added + 1), log(diff.badwords_removed + 1), log(diff.proportion_of_badwords_added + 1), log(diff.proportion_of_badwords_removed + 1), log(diff.removed_badwords_ratio + 1), log(parent_revision.badwords + 1), log(parent_revision.proportion_of_badwords + 1), log(revision.badwords + 1), log(revision.proportion_of_badwords + 1) ]
all_images, all_images / max(wikitext.revision.content_chars, 1), category_links, category_links / max(wikitext.revision.content_chars, 1), all_ref_tags, all_ref_tags / max(wikitext.revision.content_chars, 1), all_cite_templates, all_cite_templates / max(wikitext.revision.content_chars, 1), proportion_of_templated_references, non_templated_references, non_templated_references / max(wikitext.revision.content_chars, 1), non_cite_templates, non_cite_templates / max(wikitext.revision.content_chars, 1), infobox_templates, cn_templates + 1, cn_templates / max(wikitext.revision.content_chars, 1), who_templates + 1, who_templates / max(wikitext.revision.content_chars, 1), main_article_templates, main_article_templates / max(wikitext.revision.content_chars, 1), (english.stemmed.revision.stem_chars / max(wikitext.revision.content_chars, 1)), log(paragraphs_without_refs_total_length + 1), words_to_watch_count, words_to_watch_count / max(wikitext.revision.words, 1), idioms_count, idioms_count / max(wikitext.revision.words, 1), words_to_watch_count + idioms_count, (words_to_watch_count + idioms_count) / max(wikitext.revision.words, 1) ] wp10 = wikipedia.article + local_wiki """ Based largely on work by Morten Warncke-Wang et al.[1] and with a few improvements and extensions that Morten identified after publication. 1. Warncke-Wang, M., Cosley, D., & Riedl, J. (2013, August). Tell me more: An actionable quality model for wikipedia. In Proceedings of the 9th International Symposium on Open Collaboration (p. 8). ACM. http://opensym.org/wsos2013/proceedings/p0202-warncke.pdf
proportion_of_badwords = portuguese.parent_revision.badwords / \ max(portuguese.parent_revision.words, 1) proportion_of_misspellings = portuguese.parent_revision.misspellings / \ max(portuguese.parent_revision.words, 1) proportion_of_informals = portuguese.parent_revision.informals / \ max(portuguese.parent_revision.words, 1) added_badwords_ratio = proportion_of_badwords_added / \ max(proportion_of_badwords, 0.01) added_misspellings_ratio = proportion_of_misspellings_added / \ max(proportion_of_misspellings, 0.01) added_informals_ratio = proportion_of_informals_added / \ max(proportion_of_informals, 0.01) damaging = util.no_lang_damaging + enwiki.badwords + enwiki.informals + [ log(portuguese.diff.badwords_added + 1), log(portuguese.diff.badwords_removed + 1), log(portuguese.diff.informals_added + 1), log(portuguese.diff.informals_removed + 1), log(portuguese.diff.misspellings_added + 1), log(portuguese.diff.misspellings_removed + 1), proportion_of_badwords_added, proportion_of_badwords_removed, proportion_of_informals_added, proportion_of_informals_removed, proportion_of_misspellings_added, proportion_of_misspellings_removed, added_badwords_ratio, added_informals_ratio, added_misspellings_ratio ]
proportion_of_informals_removed = spanish.diff.informals_removed / max(spanish.diff.words_removed, 1) proportion_of_badwords = spanish.parent_revision.badwords / max(spanish.parent_revision.words, 1) proportion_of_misspellings = spanish.parent_revision.misspellings / max(spanish.parent_revision.words, 1) proportion_of_informals = spanish.parent_revision.informals / max(spanish.parent_revision.words, 1) added_badwords_ratio = proportion_of_badwords_added / max(proportion_of_badwords, 0.01) added_misspellings_ratio = proportion_of_misspellings_added / max(proportion_of_misspellings, 0.01) added_informals_ratio = proportion_of_informals_added / max(proportion_of_informals, 0.01) damaging = ( util.no_lang_damaging + enwiki.badwords + enwiki.informals + [ log(spanish.diff.badwords_added + 1), log(spanish.diff.badwords_removed + 1), log(spanish.diff.informals_added + 1), log(spanish.diff.informals_removed + 1), log(spanish.diff.misspellings_added + 1), log(spanish.diff.misspellings_removed + 1), proportion_of_badwords_added, proportion_of_badwords_removed, proportion_of_informals_added, proportion_of_informals_removed, proportion_of_misspellings_added, proportion_of_misspellings_removed, added_badwords_ratio, added_informals_ratio, added_misspellings_ratio, ]
proportion_of_badwords = vietnamese.parent_revision.badwords / \ max(vietnamese.parent_revision.words, 1) proportion_of_misspellings = vietnamese.parent_revision.misspellings / \ max(vietnamese.parent_revision.words, 1) proportion_of_informals = vietnamese.parent_revision.informals / \ max(vietnamese.parent_revision.words, 1) added_badwords_ratio = proportion_of_badwords_added / \ max(proportion_of_badwords, 0.01) added_misspellings_ratio = proportion_of_misspellings_added / \ max(proportion_of_misspellings, 0.01) added_informals_ratio = proportion_of_informals_added / \ max(proportion_of_informals, 0.01) damaging = util.no_lang_damaging + enwiki.badwords + enwiki.informals + [ log(vietnamese.diff.badwords_added + 1), log(vietnamese.diff.badwords_removed + 1), log(vietnamese.diff.informals_added + 1), log(vietnamese.diff.informals_removed + 1), log(vietnamese.diff.misspellings_added + 1), log(vietnamese.diff.misspellings_removed + 1), proportion_of_badwords_added, proportion_of_badwords_removed, proportion_of_informals_added, proportion_of_informals_removed, proportion_of_misspellings_added, proportion_of_misspellings_removed, added_badwords_ratio, added_informals_ratio, added_misspellings_ratio ]