Beispiel #1
0
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)

    scorer_model = MLScorerModel.load(open(args['<model-file>'], 'rb'))
    if args['<text>'] == "<stdin>":
        text = sys.stdin.read()
    else:
        text = open(args['<text>']).read()

    print(score(scorer_model, text))
Beispiel #2
0
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)

    scorer_model = MLScorerModel.load(open(args['<model-file>'], 'rb'))
    if args['<text>'] == "<stdin>":
        text = sys.stdin.read()
    else:
        text = open(args['<text>']).read()

    print(score(scorer_model, text))
def main(argv=None):
    args = docopt.docopt(__doc__, argv=argv)
    periods = mysqltsv.read(sys.stdin, types=[int, str, int, int, int])
    page_periods = {p.page_id: (p.start_rev_id, p.end_rev_id) for p in periods}

    scorer_model = MLScorerModel.load(open(args['<model-file>'], 'rb'))

    dump_paths = args['<dump-file>']

    run(page_periods, scorer_model, dump_paths)
def main():
    args = docopt.docopt(__doc__)

    logging.basicConfig(
        level=logging.DEBUG if args['--debug'] else logging.INFO,
        format='%(asctime)s %(levelname)s:%(name)s -- %(message)s')
    logging.getLogger('requests').setLevel(logging.WARNING)

    rev_ids = (int(r.rev_id) for r in mysqltsv.read(sys.stdin))

    scorer_model = MLScorerModel.load(open(args['<model-file>']))
    session = mwapi.Session(
        args['--host'], user_agent="Anon bias study <*****@*****.**>")
    extractor = api.Extractor(session)
    score_processor = ScoreProcessor(scorer_model, extractor)

    cache = json.loads(args['--cache'] or "{}")

    verbose = args['--verbose']
    debug = args['--debug']

    run(rev_ids, score_processor, cache, verbose, debug)
def main():
    args = docopt.docopt(__doc__)

    logging.basicConfig(
        level=logging.DEBUG if args['--debug'] else logging.INFO,
        format='%(asctime)s %(levelname)s:%(name)s -- %(message)s'
    )
    logging.getLogger('requests').setLevel(logging.WARNING)

    rev_ids = (int(r.rev_id) for r in mysqltsv.read(sys.stdin))

    scorer_model = MLScorerModel.load(open(args['<model-file>']))
    session = mwapi.Session(
        args['--host'], user_agent="Anon bias study <*****@*****.**>")
    extractor = api.Extractor(session)
    score_processor = ScoreProcessor(scorer_model, extractor)

    cache = json.loads(args['--cache'] or "{}")

    verbose = args['--verbose']
    debug = args['--debug']

    run(rev_ids, score_processor, cache, verbose, debug)
Beispiel #6
0
# This parser uses wikiclass to assess the quality of each page.
import wikiclass
from revscoring.scorer_models import MLScorerModel

columns=['title','quality']

scorer_model = MLScorerModel.load(open("models/enwiki.nettrom_wp10.gradient_boosting.model", "rb"))

def parse(page):
    res=[]
    empty=True
    for rev in page:
        empty=False
        pass
    if empty:
        return res
    res.append(page.title)
    res.append(wikiclass.score(scorer_model, rev.text)['prediction'])
    return [res]
Beispiel #7
0
#!/home/ellery/miniconda3/bin/python

import wikiclass
from revscoring.scorer_models import MLScorerModel
import requests
from collections import Counter
import time
import os

model = MLScorerModel.load(
    open("/home/west1/github/wikiclass/models/enwiki.wp10.rf.model", "rb"))

datadir = os.environ[
    'HOME'] + '/wikimedia/trunk/hoaxes/data/all_relevant_article_creation_content/'

print('\t'.join(['title', 'Stub', 'B', 'C', 'FA', 'Start', 'GA']))
for f in os.listdir(datadir):
    if f.endswith(".txt"):
        with open(datadir + f, 'r') as markup_file:
            markup = markup_file.read()
            obj = wikiclass.score(model, markup)
            print('\t'.join([
                f,
                str(obj['probability']['Stub']),
                str(obj['probability']['B']),
                str(obj['probability']['C']),
                str(obj['probability']['FA']),
                str(obj['probability']['Start']),
                str(obj['probability']['GA'])
            ]))
    else:
#!/home/ellery/miniconda3/bin/python

import wikiclass
from revscoring.scorer_models import MLScorerModel
import requests
from collections import Counter
import time
import os

model = MLScorerModel.load(open(
  "/home/west1/github/wikiclass/models/enwiki.wp10.rf.model", "rb"))

datadir = os.environ['HOME'] + '/wikimedia/trunk/hoaxes/data/all_relevant_article_creation_content/'

print('\t'.join(['title', 'Stub', 'B', 'C', 'FA', 'Start', 'GA']))
for f in os.listdir(datadir):
  if f.endswith(".txt"):
    with open(datadir + f, 'r') as markup_file:
      markup = markup_file.read()
      obj = wikiclass.score(model, markup)
      print('\t'.join([f, str(obj['probability']['Stub']),
        str(obj['probability']['B']), str(obj['probability']['C']), str(obj['probability']['FA']),
        str(obj['probability']['Start']), str(obj['probability']['GA'])]))
  else:
    continue