def main(argv=None): args = docopt.docopt(__doc__, argv=argv) scorer_model = MLScorerModel.load(open(args['<model-file>'], 'rb')) if args['<text>'] == "<stdin>": text = sys.stdin.read() else: text = open(args['<text>']).read() print(score(scorer_model, text))
def main(argv=None): args = docopt.docopt(__doc__, argv=argv) periods = mysqltsv.read(sys.stdin, types=[int, str, int, int, int]) page_periods = {p.page_id: (p.start_rev_id, p.end_rev_id) for p in periods} scorer_model = MLScorerModel.load(open(args['<model-file>'], 'rb')) dump_paths = args['<dump-file>'] run(page_periods, scorer_model, dump_paths)
def main(): args = docopt.docopt(__doc__) logging.basicConfig( level=logging.DEBUG if args['--debug'] else logging.INFO, format='%(asctime)s %(levelname)s:%(name)s -- %(message)s') logging.getLogger('requests').setLevel(logging.WARNING) rev_ids = (int(r.rev_id) for r in mysqltsv.read(sys.stdin)) scorer_model = MLScorerModel.load(open(args['<model-file>'])) session = mwapi.Session( args['--host'], user_agent="Anon bias study <*****@*****.**>") extractor = api.Extractor(session) score_processor = ScoreProcessor(scorer_model, extractor) cache = json.loads(args['--cache'] or "{}") verbose = args['--verbose'] debug = args['--debug'] run(rev_ids, score_processor, cache, verbose, debug)
def main(): args = docopt.docopt(__doc__) logging.basicConfig( level=logging.DEBUG if args['--debug'] else logging.INFO, format='%(asctime)s %(levelname)s:%(name)s -- %(message)s' ) logging.getLogger('requests').setLevel(logging.WARNING) rev_ids = (int(r.rev_id) for r in mysqltsv.read(sys.stdin)) scorer_model = MLScorerModel.load(open(args['<model-file>'])) session = mwapi.Session( args['--host'], user_agent="Anon bias study <*****@*****.**>") extractor = api.Extractor(session) score_processor = ScoreProcessor(scorer_model, extractor) cache = json.loads(args['--cache'] or "{}") verbose = args['--verbose'] debug = args['--debug'] run(rev_ids, score_processor, cache, verbose, debug)
# This parser uses wikiclass to assess the quality of each page. import wikiclass from revscoring.scorer_models import MLScorerModel columns=['title','quality'] scorer_model = MLScorerModel.load(open("models/enwiki.nettrom_wp10.gradient_boosting.model", "rb")) def parse(page): res=[] empty=True for rev in page: empty=False pass if empty: return res res.append(page.title) res.append(wikiclass.score(scorer_model, rev.text)['prediction']) return [res]
#!/home/ellery/miniconda3/bin/python import wikiclass from revscoring.scorer_models import MLScorerModel import requests from collections import Counter import time import os model = MLScorerModel.load( open("/home/west1/github/wikiclass/models/enwiki.wp10.rf.model", "rb")) datadir = os.environ[ 'HOME'] + '/wikimedia/trunk/hoaxes/data/all_relevant_article_creation_content/' print('\t'.join(['title', 'Stub', 'B', 'C', 'FA', 'Start', 'GA'])) for f in os.listdir(datadir): if f.endswith(".txt"): with open(datadir + f, 'r') as markup_file: markup = markup_file.read() obj = wikiclass.score(model, markup) print('\t'.join([ f, str(obj['probability']['Stub']), str(obj['probability']['B']), str(obj['probability']['C']), str(obj['probability']['FA']), str(obj['probability']['Start']), str(obj['probability']['GA']) ])) else:
#!/home/ellery/miniconda3/bin/python import wikiclass from revscoring.scorer_models import MLScorerModel import requests from collections import Counter import time import os model = MLScorerModel.load(open( "/home/west1/github/wikiclass/models/enwiki.wp10.rf.model", "rb")) datadir = os.environ['HOME'] + '/wikimedia/trunk/hoaxes/data/all_relevant_article_creation_content/' print('\t'.join(['title', 'Stub', 'B', 'C', 'FA', 'Start', 'GA'])) for f in os.listdir(datadir): if f.endswith(".txt"): with open(datadir + f, 'r') as markup_file: markup = markup_file.read() obj = wikiclass.score(model, markup) print('\t'.join([f, str(obj['probability']['Stub']), str(obj['probability']['B']), str(obj['probability']['C']), str(obj['probability']['FA']), str(obj['probability']['Start']), str(obj['probability']['GA'])])) else: continue