コード例 #1
0
ファイル: model.py プロジェクト: accraze/inference-services
 def load(self):
     with open("enwiki.goodfaith.gradient_boosting.model") as f:
         self.model = Model.load(f)
     self.extractor = api.Extractor(
         mwapi.Session("https://en.wikipedia.org",
                       user_agent="KFServing revscoring demo"))
     self.ready = True
コード例 #2
0
def extract_features(label_file,context):
    rev_ids = [json.loads(label) for label in load_labels(label_file)]
    
    session = mwapi.Session(
        host= "https://{0}.wikipedia.org".format(
            context.replace("wiki","")),
        user_agent="Ores bias analysis project by Nate TeBlunthuis <*****@*****.**>")

    dependent_names = ["editquality.feature_lists.{0}.damaging".format(context),
                  "editquality.feature_lists.{0}.goodfaith".format(context)]
    dependents = []
    for dependent_path in dependent_names:
        dependent_or_list = yamlconf.import_path(dependent_path)
        if isinstance(dependent_or_list, Dependent):
            dependents.append(dependent_or_list)
        else:
            dependents.extend(dependent_or_list)

    extractor = api.Extractor(session)
    features = extract(dependents, rev_ids, extractor,extractors=os.cpu_count() - 1)
    return features
コード例 #3
0
def main():
    args = docopt.docopt(__doc__)

    logging.basicConfig(
        level=logging.DEBUG if args['--debug'] else logging.INFO,
        format='%(asctime)s %(levelname)s:%(name)s -- %(message)s')
    logging.getLogger('requests').setLevel(logging.WARNING)

    rev_ids = (int(r.rev_id) for r in mysqltsv.read(sys.stdin))

    scorer_model = MLScorerModel.load(open(args['<model-file>']))
    session = mwapi.Session(
        args['--host'], user_agent="Anon bias study <*****@*****.**>")
    extractor = api.Extractor(session)
    score_processor = ScoreProcessor(scorer_model, extractor)

    cache = json.loads(args['--cache'] or "{}")

    verbose = args['--verbose']
    debug = args['--debug']

    run(rev_ids, score_processor, cache, verbose, debug)
コード例 #4
0
def get_and_store_features(diff, url):
    try:
        print(diff, url)
        session = mwapi.Session(
            url,
            api_path='/api.php',
            user_agent='Cynthia - Vandalism detection bot, @noreplyz')
        api_extractor = api.Extractor(session)

        with open('20k-features-damaging-2.tsv', 'a') as f:
            # print(diff)
            features = list(api_extractor.extract(int(diff), damaging))
            features = [str(fea) for fea in features]
            f.write(url + '/wiki/?diff=' + diff + '\t' + '\t'.join(features) +
                    '\n')

    except RevisionNotFound:
        print('Revision not found.')
        return
    except Exception:
        print('Wiki closed, or other issue.')
        return
コード例 #5
0
    english.badwords.revision.diff.match_prop_delta_sum,
    # Measures the proportional change in "informals"
    english.informals.revision.diff.match_prop_delta_sum,
    # Measures the proportional change meaningful words
    english.stopwords.revision.diff.non_stopword_prop_delta_sum,
    # Is the user anonymous
    revision_oriented.revision.user.is_anon,
    # Is the user a bot or a sysop
    revision_oriented.revision.user.in_group({'bot', 'sysop'}),
    # How long ago did the user register?
    temporal.revision.user.seconds_since_registration
]

trainingRevId = []
testRevId = []
api_extractor = api.Extractor(session)
"""
sample = []
with open('datasample.csv') as csv_file:
    data_csv_reader = csv.reader(csv_file, delimiter=',')
    for row in data_csv_reader:
        if row != []:
            sample.append(row[1])

sampleData = []
sampleInfo = []
for revid in sample:
    revid = int(revid)
    try:
        #print("https://en.wikipedia.org/wiki/?diff={0}".format(revid))
        sampleRevData = list(api_extractor.extract(revid, features))
コード例 #6
0
import mwapi
from revscoring import ScorerModel
from revscoring.extractors import api

with open("models/enwiki.damaging.linear_svc.model") as f:
    model = ScorerModel.load(f)

extractor = api.Extractor(
    mwapi.Session(host="https://en.wikipedia.org",
                  user_agent="revscoring demo"))
values = extractor.extract(123456789, model.features)
print(model.score(values))
コード例 #7
0
import mwapi
import bz2
from revscoring import Model
from revscoring.extractors import api

model = Model.load(
    bz2.open("models/ptwiki.draft_quality.gradient_boosting.model.bz2", "rb"))

extractor = api.Extractor(
    mwapi.Session(host="https://pt.wikipedia.org",
                  user_agent="draftquality test"))
values = extractor.extract(58071111, model.features)
print(model.score(values))
コード例 #8
0
def get_extractor(lang="de"):
    session = mwapi.Session("https://%s.wikipedia.org" % lang,
                            user_agent=USER_AGENT)
    return api.Extractor(session)
コード例 #9
0
ファイル: scoring_handler.py プロジェクト: soap-team/cynthia
 def get_features(self, wiki, diff):
     session = mwapi.Session(wiki,
                             api_path='/api.php',
                             user_agent=self.config['user_agent'])
     api_extractor = api.Extractor(session)
     return list(api_extractor.extract(int(diff), damaging))
コード例 #10
0
ファイル: test-classifier.py プロジェクト: soap-team/cynthia
def get_features(wiki, diff, featurelist):
    session = mwapi.Session('https://' + wiki,
                            api_path='/api.php',
                            user_agent='Cynthia testing')
    api_extractor = api.Extractor(session)
    return list(api_extractor.extract(int(diff), featurelist))