def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.tokens = aggregators.len(self.datasources.tokens) "`int` : The number of tokens in the revision" self.numbers = aggregators.len(self.datasources.numbers) "`int` : The number of number tokens in the revision" self.whitespaces = aggregators.len(self.datasources.whitespaces) "`int` : The number of whitespace tokens in the revision" self.markups = aggregators.len(self.datasources.markups) "`int` : The number of markup tokens in the revision" self.cjks = aggregators.len(self.datasources.cjks) "`int` : The number of Chinese/Japanese/Korean tokens in the revision" self.entities = aggregators.len(self.datasources.entities) "`int` : The number of HTML entity tokens in the revision" self.urls = aggregators.len(self.datasources.urls) "`int` : The number of URL tokens in the revision" self.words = aggregators.len(self.datasources.words) "`int` : The number of word tokens in the revision" self.uppercase_words = \ aggregators.len(self.datasources.uppercase_words) "`int` : The number of UPPERCASE word tokens in the revision" self.punctuations = aggregators.len(self.datasources.punctuations) "`int` : The number of punctuation tokens in the revision" self.breaks = aggregators.len(self.datasources.breaks) "`int` : The number of break tokens in the revision" self.longest_token = aggregators.max( mappers.map(len, self.datasources.tokens), returns=int) "`int` : The longest single token in the revision" self.longest_word = aggregators.max( mappers.map(len, self.datasources.words), returns=int) "`int` : The longest single word-token in the revision"
cite_templates, name="enwiki.revision.non_cite_templates") # Links category_links = wikitext.revision.wikilink_titles_matching( r"Category\:", name="enwiki.revision.category_links") image_links = wikitext.revision.wikilink_titles_matching( r"File|Image\:", name="enwiki.revision.image_links") # References revision = Revision( "enwiki.revision.revision", wikitext.revision.datasources, ) paragraphs = mappers.map(str, revision.paragraphs_sentences_and_whitespace, name="enwiki.revision.paragraphs") paragraphs_without_refs = filters.regex_matching( r"^(?!\s*$)((?!<ref>)(.|\n))*$", paragraphs, name="enwiki.revision.paragraphs_without_refs") paragraphs_without_refs_total_length = aggregators.sum( mappers.map(len, paragraphs_without_refs), name="enwiki.revision.paragraphs_without_refs_total_length") local_wiki = [ image_links, image_links / max(wikitext.revision.content_chars, 1), category_links, category_links / max(wikitext.revision.content_chars, 1), cite_templates,
def __init__(self, name, revision_datasources): super().__init__(name, revision_datasources) self.chars = aggregators.len(self.datasources.text, name=self._name + ".chars") "`int` : The number of characters in the text" self.numeric_chars = aggregators.sum( mappers.map(len, self.datasources.numbers), name=self._name + ".numeric_chars", returns=int) "`int` : The number of numeric characters in the text" self.whitespace_chars = aggregators.sum( mappers.map(len, self.datasources.whitespaces), name=self._name + ".whitespace_chars", returns=int) "`int` : The number of whitespace characters in the text" self.markup_chars = aggregators.sum(mappers.map( len, self.datasources.markups), name=self._name + ".markup_chars", returns=int) "`int` : The number of wikitext markup characters in the text" self.cjk_chars = aggregators.sum(mappers.map(len, self.datasources.cjks), name=self._name + ".cjk_chars", returns=int) "`int` : The number of Chinese/Japanese/Korean characters in the text" self.entity_chars = aggregators.sum(mappers.map( len, self.datasources.entities), name=self._name + ".entity_chars", returns=int) "`int` : The number of HTML entity characters in the text" self.url_chars = aggregators.sum(mappers.map(len, self.datasources.urls), name=self._name + ".url_chars", returns=int) "`int` : The number of URL characters in the text" self.word_chars = aggregators.sum(mappers.map(len, self.datasources.words), name=self._name + ".word_chars", returns=int) "`int` : The number of word characters in the text" self.uppercase_word_chars = aggregators.sum( mappers.map(len, self.datasources.uppercase_words), name=self._name + ".uppercase_word_chars", returns=int) "`int` : The number of UPPERCASE WORD characters in the text" self.punctuation_chars = aggregators.sum( mappers.map(len, self.datasources.punctuations), name=self._name + ".punctuation_chars", returns=int) "`int` : The number of punctuation characters in the text" self.break_chars = aggregators.sum(mappers.map( len, self.datasources.breaks), name=self._name + ".break_chars", returns=int) "`int` : The number of break characters in the text" self.longest_repeated_char = \ Feature(self._name + ".longest_repeated_char", _process_longest_repeated_char, returns=int, depends_on=[self.datasources.text]) "`int` : The most repeated character"
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.chars_added = aggregators.sum(mappers.map( len, self.datasources.segments_added), name=self._name + ".chars_added", returns=int) "`int` : The number of characters added" self.chars_removed = aggregators.sum( mappers.map(len, self.datasources.segments_removed), name=self._name + ".chars_removed", returns=int) "`int` : The number of characters removed" self.numeric_chars_added = aggregators.sum( mappers.map(len, self.datasources.numbers_added), name=self._name + ".numeric_chars_added", returns=int) "`int` : The number of numeric characters added" self.numeric_chars_removed = aggregators.sum( mappers.map(len, self.datasources.numbers_removed), name=self._name + ".numeric_chars_removed", returns=int) "`int` : The number of numeric characters removed" self.whitespace_chars_added = aggregators.sum( mappers.map(len, self.datasources.whitespaces_added), name=self._name + ".whitespace_chars_added", returns=int) "`int` : The number of whitespace characters added" self.whitespace_chars_removed = aggregators.sum( mappers.map(len, self.datasources.whitespaces_removed), name=self._name + ".whitespace_chars_removed", returns=int) "`int` : The number of whitespace characters removed" self.markup_chars_added = aggregators.sum( mappers.map(len, self.datasources.markups_added), name=self._name + ".markup_chars_added", returns=int) "`int` : The number of markup characters added" self.markup_chars_removed = aggregators.sum( mappers.map(len, self.datasources.markups_removed), name=self._name + ".markup_chars_removed", returns=int) "`int` : The number of markup characters removed" self.cjk_chars_added = aggregators.sum( mappers.map(len, self.datasources.cjks_added), name=self._name + ".cjk_chars_added", returns=int) "`int` : The number of cjk characters added" self.cjk_chars_removed = aggregators.sum( mappers.map(len, self.datasources.cjks_removed), name=self._name + ".cjk_chars_removed", returns=int) "`int` : The number of cjk characters removed" self.entity_chars_added = aggregators.sum( mappers.map(len, self.datasources.entities_added), name=self._name + ".entity_chars_added", returns=int) "`int` : The number of entity characters added" self.entity_chars_removed = aggregators.sum( mappers.map(len, self.datasources.entities_removed), name=self._name + ".entity_chars_removed", returns=int) "`int` : The number of entity characters removed" self.url_chars_added = aggregators.sum( mappers.map(len, self.datasources.urls_added), name=self._name + ".url_chars_added", returns=int) "`int` : The number of url characters added" self.url_chars_removed = aggregators.sum( mappers.map(len, self.datasources.urls_removed), name=self._name + ".url_chars_removed", returns=int) "`int` : The number of url characters removed" self.word_chars_added = aggregators.sum( mappers.map(len, self.datasources.words_added), name=self._name + ".word_chars_added", returns=int) "`int` : The number of word characters added" self.word_chars_removed = aggregators.sum( mappers.map(len, self.datasources.words_removed), name=self._name + ".word_chars_removed", returns=int) "`int` : The number of word characters removed" self.uppercase_word_chars_added = aggregators.sum( mappers.map(len, self.datasources.uppercase_words_added), name=self._name + ".uppercase_word_chars_added", returns=int) "`int` : The number of UPPERCASE word characters added" self.uppercase_word_chars_removed = aggregators.sum( mappers.map(len, self.datasources.uppercase_words_removed), name=self._name + ".uppercase_word_chars_removed", returns=int) "`int` : The number of UPPERCASE word characters removed" self.punctuation_chars_added = aggregators.sum( mappers.map(len, self.datasources.punctuations_added), name=self._name + ".punctuation_chars_added", returns=int) "`int` : The number of punctuation characters added" self.punctuation_chars_removed = aggregators.sum( mappers.map(len, self.datasources.punctuations_removed), name=self._name + ".punctuation_chars_removed", returns=int) "`int` : The number of punctuation characters removed" self.break_chars_added = aggregators.sum( mappers.map(len, self.datasources.breaks_added), name=self._name + ".break_chars_added", returns=int) "`int` : The number of break characters added" self.break_chars_removed = aggregators.sum( mappers.map(len, self.datasources.breaks_removed), name=self._name + ".break_chars_removed", returns=int) "`int` : The number of break characters removed" self.longest_repeated_char_added = \ Feature(self._name + ".longest_repeated_char_added", _process_longest_repeated_char_added, returns=int, depends_on=[self.datasources.segments_added]) "`int` : The most repeated character added"
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.segments_added = aggregators.len( self.datasources.segments_added, name=self._name + ".segments_added" ) "`int` : The number of segments added " self.segments_removed = aggregators.len( self.datasources.segments_removed, name=self._name + ".segments_removed" ) "`int` : The number of segments removed " self.tokens_added = aggregators.len( self.datasources.tokens_added, name=self._name + ".tokens_added" ) "`int` : The number of tokens added " self.tokens_removed = aggregators.len( self.datasources.tokens_removed, name=self._name + ".tokens_removed" ) "`int` : The number of tokens removed " self.numbers_added = aggregators.len( self.datasources.numbers_added, name=self._name + ".numbers_added" ) "`int` : The number of number tokens added " self.numbers_removed = aggregators.len( self.datasources.numbers_removed, name=self._name + ".numbers_removed" ) "`int` : The number of number tokens removed " self.markups_added = aggregators.len( self.datasources.markups_added, name=self._name + ".markups_added" ) "`int` : The number of markup tokens added " self.markups_removed = aggregators.len( self.datasources.markups_removed, name=self._name + ".markups_removed" ) "`int` : The number of markup tokens removed " self.whitespaces_added = aggregators.len( self.datasources.whitespaces_added, name=self._name + ".whitespaces_added" ) "`int` : The number of whitespace tokens added " self.whitespaces_removed = aggregators.len( self.datasources.whitespaces_removed, name=self._name + ".whitespaces_removed" ) "`int` : The number of whitespace tokens removed " self.cjks_added = aggregators.len( self.datasources.cjks_added, name=self._name + ".cjks_added" ) "`int` : The number of cjk tokens added " self.cjks_removed = aggregators.len( self.datasources.cjks_removed, name=self._name + ".cjks_removed" ) "`int` : The number of cjk tokens removed " self.entities_added = aggregators.len( self.datasources.entities_added, name=self._name + ".entities_added" ) "`int` : The number of entity tokens added " self.entities_removed = aggregators.len( self.datasources.entities_removed, name=self._name + ".entities_removed" ) "`int` : The number of entity tokens removed " self.urls_added = aggregators.len( self.datasources.urls_added, name=self._name + ".urls_added" ) "`int` : The number of url tokens added " self.urls_removed = aggregators.len( self.datasources.urls_removed, name=self._name + ".urls_removed" ) "`int` : The number of url tokens removed " self.words_added = aggregators.len( self.datasources.words_added, name=self._name + ".words_added" ) "`int` : The number of word tokens added " self.words_removed = aggregators.len( self.datasources.words_removed, name=self._name + ".words_removed" ) "`int` : The number of word tokens removed " self.uppercase_words_added = aggregators.len( self.datasources.uppercase_words_added, name=self._name + ".uppercase_words_added" ) "`int` : The number of word tokens added " self.uppercase_words_removed = aggregators.len( self.datasources.uppercase_words_removed, name=self._name + ".uppercase_words_removed" ) "`int` : The number of word tokens removed " self.punctuations_added = aggregators.len( self.datasources.punctuations_added, name=self._name + ".punctuations_added" ) "`int` : The number of punctuation tokens added " self.punctuations_removed = aggregators.len( self.datasources.punctuations_removed, name=self._name + ".punctuations_removed" ) "`int` : The number of punctuation tokens removed " self.breaks_added = aggregators.len( self.datasources.breaks_added, name=self._name + ".breaks_added" ) "`int` : The number of break tokens added " self.breaks_removed = aggregators.len( self.datasources.breaks_removed, name=self._name + ".breaks_removed" ) "`int` : The number of break tokens removed" self.longest_token_added = aggregators.max( mappers.map(len, self.datasources.tokens_added), name=self._name + '.longest_token_added' ) "`int` : The length of the longest token added" self.longest_uppercase_word_added = aggregators.max( mappers.map(len, self.datasources.uppercase_words_added) ) """
import pickle from revscoring.datasources.datasource import Datasource from revscoring.datasources.meta import mappers from revscoring.dependencies import solve tokens = Datasource("tokens") my_ints = Datasource("my_ints") def extract_first_char(token): return token[:1] first_char = mappers.map(extract_first_char, tokens, name="first_char") lower_case_tokens = mappers.lower_case(tokens, name="lower_case_tokens") derepeat_tokens = mappers.derepeat(tokens, name="derepeat_tokens") de1337_tokens = mappers.de1337(tokens, name="de1337_tokens") abs_ints = mappers.abs(my_ints) def test_item_mapper(): cache = {tokens: ["alpha", "bravo", "charlie", "delta"]} assert (solve(first_char, cache=cache) == ["a", "b", "c", "d"]) assert pickle.loads(pickle.dumps(first_char)) == first_char
returns=int) image_tags_str = wikitext.revision.datasources.tags_str_matching( r"<(gallery|imagemap)", name="ukwiki.revision.image_tags_str") images_in_tags = Feature("ukwiki.revision.images_in_tags", get_images, depends_on=[image_tags_str], returns=int) all_images = image_links + image_templates +\ images_in_templates + images_in_tags # References paragraphs = mappers.map( str, wikitext.revision.datasources.paragraphs_sentences_and_whitespace, name="ukwiki.revision.paragraphs" ) paragraphs_without_refs = filters.regex_matching( r"^(?!\s*$)((?!<ref>)(.|\n))*$", paragraphs, name="ukwiki.revision.paragraphs_without_refs" ) paragraphs_without_refs_total_length = aggregators.sum( mappers.map(len, paragraphs_without_refs), name="ukwiki.revision.paragraphs_without_refs_total_length" ) local_wiki = [ all_images, all_images / max(wikitext.revision.content_chars, 1), category_links,
r"sen[ _]referencias|cómpre[ _]páxina|" + r"verificar[ _]credibilidade", name="glwiki.revision.cn_templates") # Links category_links = wikitext.revision.wikilink_titles_matching( r"(Categoría|Category)\:", name="glwiki.revision.category_links") image_links = wikitext.revision.wikilink_titles_matching( r"(File|Image|Ficheiro)\:", name="glwiki.revision.image_links") # References revision = Revision( "glwiki.revision.revision", wikitext.revision.datasources, ) paragraphs = mappers.map( str, revision.paragraphs_sentences_and_whitespace, name="glwiki.revision.paragraphs" ) paragraphs_without_refs = filters.regex_matching( r"^(?!\s*$)((?!<ref>)(.|\n))*$", paragraphs, name="glwiki.revision.paragraphs_without_refs" ) paragraphs_without_refs_total_length = aggregators.sum( mappers.map(len, paragraphs_without_refs), name="glwiki.revision.paragraphs_without_refs_total_length" ) local_wiki = [ image_links, image_links / max(wikitext.revision.content_chars, 1), category_links,
def __init__(self, name, revision_datasources): super().__init__(name, revision_datasources) self.wikicode = Datasource(self._name + ".wikicode", _process_wikicode, depends_on=[revision_datasources.text]) """ A :class:`mwparserfromhell.wikicode.Wikicode` abstract syntax tree representing the structure of the page. """ self.node_class_map = Datasource(self._name + ".node_class_map", _process_node_class_map, depends_on=[self.wikicode]) """ A map of mwparserfromhell.wikicode.<class> to lists of nodes of that type. """ self.content = execute_method("strip_code", self.wikicode, name=self._name + ".content") """ The viewable content (no markup or templates) of the revision. """ self.headings = get_key(mwparserfromhell.nodes.Heading, self.node_class_map, default=[], name=self._name + ".headings") """ A list of :class:`mwparserfromhell.nodes.heading.Heading`'s """ self.heading_titles = mappers.map(_extract_heading_title, self.headings, name=self._name + ".heading_titles") """ A list of heading titles """ self.external_links = get_key(mwparserfromhell.nodes.ExternalLink, self.node_class_map, default=[], name=self._name + ".external_links") """ A list of :class:`mwparserfromhell.nodes.heading.ExternalLink`'s """ self.external_link_urls = mappers.map(_extract_external_link_url, self.external_links, name=self._name + ".external_link_url") """ A list of external link urls """ self.wikilinks = get_key(mwparserfromhell.nodes.Wikilink, self.node_class_map, default=[], name=self._name + ".wikilinks") """ A list of :class:`mwparserfromhell.nodes.heading.Wikilink`'s """ self.wikilink_titles = mappers.map(_extract_wikilink_title, self.wikilinks, name=self._name + ".wikilink_titles") """ Returns a list of string titles of internal links (aka "targets") """ self.tags = get_key(mwparserfromhell.nodes.Tag, self.node_class_map, default=[], name=self._name + ".tags") """ A list of :class:`mwparserfromhell.nodes.heading.Tag`'s """ self.tag_names = mappers.map(_extract_tag_name, self.tags, name=self._name + ".tag_names") """ Returns a list of html tag names present in the content of the revision """ self.tags_str = mappers.map(str, self.tags, name=self._name + ".tags_str") """ Returns a list of tags present in the content of the revision as strings """ self.templates = get_key(mwparserfromhell.nodes.Template, self.node_class_map, default=[], name=self._name + ".templates") """ A list of :class:`mwparserfromhell.nodes.heading.Templates`'s """ self.template_names = mappers.map(_extract_template_name, self.templates, name=self._name + ".template_names") """ Returns a list of template names present in the content of the revision """ self.templates_str = mappers.map(str, self.templates, name=self._name + ".templates_str") """ Returns a list of templates present in the content of the revision as strings """ self.sections = Datasource(self._name + ".section", _extract_sections, depends_on=[self.wikicode]) """
def __init__(self, name, revision_datasources): super().__init__(name, revision_datasources) self.wikicode = Datasource( self._name + ".wikicode", _process_wikicode, depends_on=[revision_datasources.text] ) """ A :class:`mwparserfromhell.wikicode.Wikicode` abstract syntax tree representing the structure of the page. """ self.node_class_map = Datasource( self._name + ".node_class_map", _process_node_class_map, depends_on=[self.wikicode] ) """ A map of mwparserfromhell.wikicode.<class> to lists of nodes of that type. """ self.content = execute_method( "strip_code", self.wikicode, name=self._name + ".content" ) """ The viewable content (no markup or templates) of the revision. """ self.headings = get_key( mwparserfromhell.nodes.Heading, self.node_class_map, default=[], name=self._name + ".headings" ) """ A list of :class:`mwparserfromhell.nodes.heading.Heading`'s """ self.heading_titles = mappers.map( _extract_heading_title, self.headings, name=self._name + ".heading_titles" ) """ A list of heading titles """ self.external_links = get_key( mwparserfromhell.nodes.ExternalLink, self.node_class_map, default=[], name=self._name + ".external_links" ) """ A list of :class:`mwparserfromhell.nodes.heading.ExternalLink`'s """ self.external_link_urls = mappers.map( _extract_external_link_url, self.external_links, name=self._name + ".external_link_url" ) """ A list of external link urls """ self.wikilinks = get_key( mwparserfromhell.nodes.Wikilink, self.node_class_map, default=[], name=self._name + ".wikilinks" ) """ A list of :class:`mwparserfromhell.nodes.heading.Wikilink`'s """ self.wikilink_titles = mappers.map( _extract_wikilink_title, self.wikilinks, name=self._name + ".wikilink_titles" ) """ Returns a list of string titles of internal links (aka "targets") """ self.tags = get_key( mwparserfromhell.nodes.Tag, self.node_class_map, default=[], name=self._name + ".tags" ) """ A list of :class:`mwparserfromhell.nodes.heading.Tag`'s """ self.tag_names = mappers.map( _extract_tag_name, self.tags, name=self._name + ".tag_names" ) """ Returns a list of html tag names present in the content of the revision """ self.templates = get_key( mwparserfromhell.nodes.Template, self.node_class_map, default=[], name=self._name + ".templates" ) """ A list of :class:`mwparserfromhell.nodes.heading.Templates`'s """ self.template_names = mappers.map( _extract_template_name, self.templates, name=self._name + ".template_names" ) """
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.segments_added = aggregators.len(self.datasources.segments_added, name=self._name + ".segments_added") "`int` : The number of segments added " self.segments_removed = aggregators.len( self.datasources.segments_removed, name=self._name + ".segments_removed") "`int` : The number of segments removed " self.tokens_added = aggregators.len(self.datasources.tokens_added, name=self._name + ".tokens_added") "`int` : The number of tokens added " self.tokens_removed = aggregators.len(self.datasources.tokens_removed, name=self._name + ".tokens_removed") "`int` : The number of tokens removed " self.numbers_added = aggregators.len(self.datasources.numbers_added, name=self._name + ".numbers_added") "`int` : The number of number tokens added " self.numbers_removed = aggregators.len( self.datasources.numbers_removed, name=self._name + ".numbers_removed") "`int` : The number of number tokens removed " self.markups_added = aggregators.len(self.datasources.markups_added, name=self._name + ".markups_added") "`int` : The number of markup tokens added " self.markups_removed = aggregators.len( self.datasources.markups_removed, name=self._name + ".markups_removed") "`int` : The number of markup tokens removed " self.whitespaces_added = aggregators.len( self.datasources.whitespaces_added, name=self._name + ".whitespaces_added") "`int` : The number of whitespace tokens added " self.whitespaces_removed = aggregators.len( self.datasources.whitespaces_removed, name=self._name + ".whitespaces_removed") "`int` : The number of whitespace tokens removed " self.cjks_added = aggregators.len(self.datasources.cjks_added, name=self._name + ".cjks_added") "`int` : The number of cjk tokens added " self.cjks_removed = aggregators.len(self.datasources.cjks_removed, name=self._name + ".cjks_removed") "`int` : The number of cjk tokens removed " self.entities_added = aggregators.len(self.datasources.entities_added, name=self._name + ".entities_added") "`int` : The number of entity tokens added " self.entities_removed = aggregators.len( self.datasources.entities_removed, name=self._name + ".entities_removed") "`int` : The number of entity tokens removed " self.urls_added = aggregators.len(self.datasources.urls_added, name=self._name + ".urls_added") "`int` : The number of url tokens added " self.urls_removed = aggregators.len(self.datasources.urls_removed, name=self._name + ".urls_removed") "`int` : The number of url tokens removed " self.words_added = aggregators.len(self.datasources.words_added, name=self._name + ".words_added") "`int` : The number of word tokens added " self.words_removed = aggregators.len(self.datasources.words_removed, name=self._name + ".words_removed") "`int` : The number of word tokens removed " self.uppercase_words_added = aggregators.len( self.datasources.uppercase_words_added, name=self._name + ".uppercase_words_added") "`int` : The number of word tokens added " self.uppercase_words_removed = aggregators.len( self.datasources.uppercase_words_removed, name=self._name + ".uppercase_words_removed") "`int` : The number of word tokens removed " self.punctuations_added = aggregators.len( self.datasources.punctuations_added, name=self._name + ".punctuations_added") "`int` : The number of punctuation tokens added " self.punctuations_removed = aggregators.len( self.datasources.punctuations_removed, name=self._name + ".punctuations_removed") "`int` : The number of punctuation tokens removed " self.breaks_added = aggregators.len(self.datasources.breaks_added, name=self._name + ".breaks_added") "`int` : The number of break tokens added " self.breaks_removed = aggregators.len(self.datasources.breaks_removed, name=self._name + ".breaks_removed") "`int` : The number of break tokens removed" self.longest_token_added = aggregators.max( mappers.map(len, self.datasources.tokens_added), name=self._name + '.longest_token_added') "`int` : The length of the longest token added" self.longest_uppercase_word_added = aggregators.max( mappers.map(len, self.datasources.uppercase_words_added)) """
"Check to see if we have at least 10 words and no refs" words = 0 refs = 0 for t in segment.tokens(): words += t.type == "word" refs += t.type in ("ref_open", "ref_close", "ref_singleton") return words > 10 and refs == 0 paragraphs_without_refs = filters.filter( filter_paragraphs_without_ref_tags, wikitext.revision.datasources.paragraphs_sentences_and_whitespace, name="ptwiki.revision.paragraphs_without_refs") paragraphs_without_refs_total_length = aggregators.sum( mappers.map(len, mappers.map(str, paragraphs_without_refs)), name="ptwiki.revision.paragraphs_without_refs_total_length") # Wikipedia:Manual of style/Words to watch words_to_watch_count = portuguese.words_to_watch.revision.matches local_wiki = [ all_images, all_images / max(wikitext.revision.content_chars, 1), category_links, category_links / max(wikitext.revision.content_chars, 1), all_ref_tags, all_ref_tags / max(wikitext.revision.content_chars, 1), all_cite_templates, all_cite_templates / max(wikitext.revision.content_chars, 1), proportion_of_templated_references, non_templated_references, non_templated_references / max(wikitext.revision.content_chars, 1), non_cite_templates, non_cite_templates / max(wikitext.revision.content_chars, 1), infobox_templates,