Example #1
0
infobox_templates = wikitext.revision.template_names_matching(
    r"infobox", name="enwiki.revision.infobox_templates")
CN_TEMPLATES = [r"Citation[_ ]needed", r"Cn", r"Fact"]
cn_templates = wikitext.revision.template_names_matching(
    "|".join(CN_TEMPLATES), name="enwiki.revision.cn_templates")
who_templates = wikitext.revision.template_names_matching(
    "Who", name="enwiki.revision.who_templates")
main_article_templates = wikitext.revision.template_names_matching(
    "Main", name="enwiki.main_article_templates")
cite_templates = wikitext.revision.template_names_matching(
    r"cite", name="enwiki.revision.cite_templates")
proportion_of_templated_references = \
    cite_templates / max(wikitext.revision.ref_tags, 1)
non_templated_references = max(wikitext.revision.ref_tags - cite_templates, 0)
non_cite_templates = sub(wikitext.revision.templates,
                         cite_templates,
                         name="enwiki.revision.non_cite_templates")

# Links
category_links = wikitext.revision.wikilink_titles_matching(
    r"Category\:", name="enwiki.revision.category_links")
image_links = wikitext.revision.wikilink_titles_matching(
    r"File|Image\:", name="enwiki.revision.image_links")

# References
revision = Revision(
    "enwiki.revision.revision",
    wikitext.revision.datasources,
)
paragraphs = mappers.map(str,
                         revision.paragraphs_sentences_and_whitespace,
Example #2
0
    r"Harvcolnb",
    r"Harvard citations", r"harvs",
    r"Harvp",
    r"Citation"
]
cite_templates = wikitext.revision.template_names_matching(
    "|".join(CITE_TEMPLATES), name="ukwiki.revision.cite_templates")
shortened_footnote_templates = wikitext.revision.template_names_matching(
    "sfn", name="ukwiki.revision.shortened_footnote_templates")
all_ref_tags = shortened_footnote_templates + wikitext.revision.ref_tags
all_cite_templates = cite_templates + shortened_footnote_templates
proportion_of_templates_references = \
    all_cite_templates / max(all_ref_tags, 1)
non_templated_references = max(all_ref_tags - all_cite_templates, 0)
non_cite_templates = sub(
        wikitext.revision.templates, all_cite_templates,
        name="ukwiki.revision.non_cite_templates"
)

# Links
CATEGORY_LINKS = [
    r"Категорія",
    r"Category",
    r"Категория"
]
category_links = wikitext.revision.wikilink_titles_matching(
    "|".join(CATEGORY_LINKS), name="ukwiki.revision.category_links")

IMAGE_LINKS = [
    r"File",
    r"Файл",
    r"Image",
Example #3
0
+++++++++++++++++
"""

from revscoring.features import wikitext
from revscoring.features.modifiers import log, max, sub

from . import wikipedia

cite_templates = wikitext.revision.template_names_matching(
    r"Kaynak|.*[ _]kaynağı",
    name="trwiki.revision.cite_templates")
proportion_of_templated_references = \
    cite_templates / max(wikitext.revision.ref_tags, 1)
non_templated_references = max(wikitext.revision.ref_tags - cite_templates, 0)
non_cite_templates = sub(
    wikitext.revision.templates, cite_templates,
    name="trwiki.revision.non_cite_templates"
)
infobox_templates = wikitext.revision.template_names_matching(
    r".*[ _]bilgi[ _]kutusu",
    name="trwiki.revision.infobox_templates")

# Copied (2015-10-29) from:
# https://fr.wikipedia.org/wiki/Wikip%C3%A9dia:Citez_vos_sources#R.C3.A9clamation_et_contestation_de_sources
cn_templates = wikitext.revision.template_names_matching(
    r"Kaynak[ _]belirt|Olgu|Fact|Delil",
    name="trwiki.revision.lvl1_cn_templates")

main_article_templates = wikitext.revision.template_names_matching(
    r"Ana|Anamadde",
    name="trwiki.main_article_templates")
Example #4
0
from revscoring.features import revision_oriented, wikitext as wikitext_features
from revscoring.features.modifiers import sub
from revscoring.languages import english

from . import mediawiki, wikipedia, wikitext

local_wiki = [
    revision_oriented.revision.comment_matches(
        r"copy|edit|npov|wp:?el",
        name="enwiki.revision.comment.has_known_word"),
    revision_oriented.revision.comment_matches(
        r"\[\[WP:AES\|←\]\]", name="enwiki.revision.comment.is_aes"),
    sub(wikitext_features.revision.template_names_matching(r"^cite"),
        wikitext_features.revision.parent.template_names_matching(r"^cite"),
        name="enwiki.revision.diff.cite_templates_added")
]

badwords = [
    english.badwords.revision.diff.match_delta_sum,
    english.badwords.revision.diff.match_delta_increase,
    english.badwords.revision.diff.match_delta_decrease,
    english.badwords.revision.diff.match_prop_delta_sum,
    english.badwords.revision.diff.match_prop_delta_increase,
    english.badwords.revision.diff.match_prop_delta_decrease
]

informals = [
    english.informals.revision.diff.match_delta_sum,
    english.informals.revision.diff.match_delta_increase,
    english.informals.revision.diff.match_delta_decrease,
    english.informals.revision.diff.match_prop_delta_sum,
Example #5
0
 wikitext.revision.diff.markup_prop_delta_increase,
 wikitext.revision.diff.markup_prop_delta_decrease,
 wikitext.revision.diff.number_delta_sum,
 wikitext.revision.diff.number_delta_increase,
 wikitext.revision.diff.number_delta_decrease,
 wikitext.revision.diff.number_prop_delta_sum,
 wikitext.revision.diff.number_prop_delta_increase,
 wikitext.revision.diff.number_prop_delta_decrease,
 wikitext.revision.diff.uppercase_word_delta_sum,
 wikitext.revision.diff.uppercase_word_delta_increase,
 wikitext.revision.diff.uppercase_word_delta_decrease,
 wikitext.revision.diff.uppercase_word_prop_delta_sum,
 wikitext.revision.diff.uppercase_word_prop_delta_increase,
 wikitext.revision.diff.uppercase_word_prop_delta_decrease,
 sub(wikitext.revision.chars,
     wikitext.revision.parent.chars,
     name="revision.diff.chars_change"),
 sub(wikitext.revision.tokens,
     wikitext.revision.parent.tokens,
     name="revision.diff.tokens_change"),
 sub(wikitext.revision.words,
     wikitext.revision.parent.words,
     name="revision.diff.words_change"),
 sub(wikitext.revision.markups,
     wikitext.revision.parent.markups,
     name="revision.diff.markups_change"),
 sub(wikitext.revision.headings,
     wikitext.revision.parent.headings,
     name="revision.diff.headings_change"),
 sub(wikitext.revision.external_links,
     wikitext.revision.parent.external_links,
Example #6
0
from revscoring.languages import english

from . import mediawiki, wikipedia, wikitext

local_wiki = [
    revision_oriented.revision.comment_matches(
        r"copy|edit|npov|wp:?el",
        name="enwiki.revision.comment.has_known_word"
    ),
    revision_oriented.revision.comment_matches(
        r"\[\[WP:AES\|←\]\]",
        name="enwiki.revision.comment.is_aes"
    ),
    sub(
        wikitext_features.revision.template_names_matching(r"^cite"),
        wikitext_features.revision.parent.template_names_matching(r"^cite"),
        name="enwiki.revision.diff.cite_templates_added"
    )
]

badwords = [
    english.badwords.revision.diff.match_delta_sum,
    english.badwords.revision.diff.match_delta_increase,
    english.badwords.revision.diff.match_delta_decrease,
    english.badwords.revision.diff.match_prop_delta_sum,
    english.badwords.revision.diff.match_prop_delta_increase,
    english.badwords.revision.diff.match_prop_delta_decrease
]

informals = [
    english.informals.revision.diff.match_delta_sum,
Example #7
0
from revscoring.features import wikitext as wikitext_features
from revscoring.features import revision_oriented
from revscoring.languages.features import RegexMatches
from revscoring.features.modifiers import sub
from revscoring.languages import english

from . import mediawiki, wikipedia, wikitext

local_wiki = [
    revision_oriented.revision.comment_matches(
        r"^delet", name="fandom.revision.comment.delete_request"),
    sub(wikitext_features.revision.template_names_matching(r"^delet"),
        wikitext_features.revision.parent.template_names_matching(r"^delet"),
        name="fandom.revision.diff.delete_added"),
    sub(wikitext_features.revision.wikilink_titles_matching(
        r"^category:(delet|candidat)"),
        wikitext_features.revision.parent.wikilink_titles_matching(
            r"^category:(delet|candidat)"),
        name="fandom.revision.diff.delete_category_added"),
    revision_oriented.revision.comment_matches(
        r"^redirected page to", name="fandom.revision.comment.likely_redirect")
]

# Redirect page
redirect_regex = r"redirect"
redirects = RegexMatches("fandom.likely_redirect", [redirect_regex])

badwords = [
    english.badwords.revision.diff.match_delta_sum,
    english.badwords.revision.diff.match_delta_increase,
    english.badwords.revision.diff.match_delta_decrease,
Example #8
0
 wikitext.revision.diff.markup_prop_delta_increase,
 wikitext.revision.diff.markup_prop_delta_decrease,
 wikitext.revision.diff.number_delta_sum,
 wikitext.revision.diff.number_delta_increase,
 wikitext.revision.diff.number_delta_decrease,
 wikitext.revision.diff.number_prop_delta_sum,
 wikitext.revision.diff.number_prop_delta_increase,
 wikitext.revision.diff.number_prop_delta_decrease,
 wikitext.revision.diff.uppercase_word_delta_sum,
 wikitext.revision.diff.uppercase_word_delta_increase,
 wikitext.revision.diff.uppercase_word_delta_decrease,
 wikitext.revision.diff.uppercase_word_prop_delta_sum,
 wikitext.revision.diff.uppercase_word_prop_delta_increase,
 wikitext.revision.diff.uppercase_word_prop_delta_decrease,
 sub(wikitext.revision.chars,
     wikitext.revision.parent.chars,
     name="revision.diff.chars_change"),
 sub(wikitext.revision.tokens,
     wikitext.revision.parent.tokens,
     name="revision.diff.tokens_change"),
 sub(wikitext.revision.words,
     wikitext.revision.parent.words,
     name="revision.diff.words_change"),
 sub(wikitext.revision.markups,
     wikitext.revision.parent.markups,
     name="revision.diff.words_change"),
 sub(wikitext.revision.headings,
     wikitext.revision.parent.headings,
     name="revision.diff.headings_change"),
 sub(wikitext.revision.external_links,
     wikitext.revision.parent.external_links,
Example #9
0
positive_polarity = Feature(
    "english.sentiment.revision.positive_polarity",
    get_positive_score,
    depends_on=[sentiment_score],
    returns=float
)

negative_polarity = Feature(
    "english.sentiment.revision.negative_polarity",
    get_negative_score,
    depends_on=[sentiment_score],
    returns=float
)

diff_polarity = sub(positive_polarity,
                    negative_polarity,
                    name="english.sentiment.revision.diff_polarity")

char_based = [
    wikitext.revision.chars,
    wikitext.revision.whitespace_chars,
    wikitext.revision.markup_chars,
    wikitext.revision.cjk_chars,
    wikitext.revision.entity_chars,
    wikitext.revision.url_chars,
    wikitext.revision.word_chars,
    wikitext.revision.uppercase_word_chars,
    wikitext.revision.punctuation_chars,
    wikitext.revision.break_chars,
    wikitext.revision.longest_repeated_char,
    wikitext.revision.whitespace_chars / max(wikitext.revision.chars, 1),
Example #10
0
"`float` : A ratio of important translations descriptions in the revision"

all_sources_datasource = Datasource(name + ".all_sources",
                                    _process_all_sources,
                                    depends_on=[item])
all_sources = aggregators.len(all_sources_datasource)
"`int` : A count of all sources in the revision"

all_wikimedia_sources_datasource = Datasource(
    name + ".all_wikimedia_sources",
    _process_wikimedia_sources,
    depends_on=[all_sources_datasource])
all_wikimedia_sources = aggregators.len(all_wikimedia_sources_datasource)
"`int` : A count of all sources which come from Wikimedia projects in the revision"

all_external_sources = modifiers.sub(all_sources, all_wikimedia_sources)
"A count of all sources which do not come from Wikimedia projects in the revision"

external_sources_ratio = all_external_sources / modifiers.max(
    wikibase_features.revision.sources, 1)
"A ratio/division between number of external references and number of claims that have references in the revision"

unique_sources = Feature(name + ".unique_sources",
                         _process_unique_sources,
                         depends_on=[all_sources_datasource],
                         returns=int)
"`int` : A count of unique sources in the revision"

# Status
is_human = revision.has_property_value(properties.INSTANCE_OF,
                                       items.HUMAN,